Gotcha. The trick is to put everything in one dictionary. You can use nested dictionaries with the SNs as the top level key.
# Create our sample datasets. In this, I used the dates as strings for display purposes. It shouldn't make any difference for however you get the datasets in.
headers1 = ['SN','Model','Sequence','Highlight']
data1 = [['1N04060REK0207198','4060R','149,519',0],
['1N04060RCK0207199','4060R','149,523',0],
['1N04060RAK0207200','4060R','149,539',0]
]
headers2 = ['SN', 'WHSDate']
data2 = [['1N04060REK0207198', 'Mon Oct 14 00:00:00 CDT 2019'],
['1N04060RCK0207199', 'Fri Oct 04 00:00:00 CDT 2019'],
['1N04060RAK0207200', 'Fri Oct 11 00:00:00 CDT 2019'],
['1N04060RCK0207212', 'Tue Sep 03 00:00:00 CDT 2019'],
['1N04038RKK0204150', 'Wed Oct 09 00:00:00 CDT 2019']
]
dataset1 = system.dataset.toPyDataSet(system.dataset.toDataSet(headers1, data1))
dataset2 = system.dataset.toPyDataSet(system.dataset.toDataSet(headers2, data2))
#################################################################################
# Get columnNames from the underlying datasets
columnNames1 = dataset1.getUnderlyingDataset().getColumnNames()
columnNames2 = dataset2.getUnderlyingDataset().getColumnNames()
# Create dictionary. Since the SN is common between the two dataset, we can use it
# as a key name, with the rest of the data as a nested dictionary.
dictOut = {}
for row in dataset1:
for keyName, col in zip(columnNames1, row):
if keyName == 'SN':
dictOut[col] = {}
if keyName != 'SN':
dictOut[row['SN']][keyName] = col
# Add in the second dataset. If the SN of the row matches one on the SN keys
# in the dictionary, then add the remaining data.
for row in dataset2:
if row['SN'] in dictOut.keys():
for keyName, col in zip(columnNames2, row):
if keyName != 'SN':
dictOut[row['SN']][keyName] = col
# The dictionary now looks like this:
# {
# u'1N04060RCK0207199': {u'Highlight': 0, u'Sequence': u'149,523', u'Model': u'4060R', u'WHSDate': u'Fri Oct 04 00:00:00 CDT 2019'},
# u'1N04060REK0207198': {u'Highlight': 0, u'Sequence': u'149,519', u'Model': u'4060R', u'WHSDate': u'Mon Oct 14 00:00:00 CDT 2019'},
# u'1N04060RAK0207200': {u'Highlight': 0, u'Sequence': u'149,539', u'Model': u'4060R', u'WHSDate': u'Fri Oct 11 00:00:00 CDT 2019'}
# }
# Create a dataset out of this new dictionary:
# Merge the two sets of column names
newHeaders = columnNames1 + [colName for colName in columnNames2 if colName not in columnNames1]
dataOut = []
for SNKey in dictOut.keys():
newRow = []
for header in newHeaders:
if header == 'SN':
newRow.append(SNKey)
else:
newRow.append(dictOut[SNKey][header])
dataOut.append(newRow)
datasetOut = system.dataset.toDataSet(newHeaders, dataOut)
Results:
*** dataset1 ***
row | SN Model Sequence Highlight
------------------------------------------------
0 | 1N04060REK0207198 4060R 149,519 0
1 | 1N04060RCK0207199 4060R 149,523 0
2 | 1N04060RAK0207200 4060R 149,539 0
*** dataset2 ***
row | SN WHSDate
----------------------------------------------------
0 | 1N04060REK0207198 Mon Oct 14 00:00:00 CDT 2019
1 | 1N04060RCK0207199 Fri Oct 04 00:00:00 CDT 2019
2 | 1N04060RAK0207200 Fri Oct 11 00:00:00 CDT 2019
3 | 1N04060RCK0207212 Tue Sep 03 00:00:00 CDT 2019
4 | 1N04038RKK0204150 Wed Oct 09 00:00:00 CDT 2019
*** datasetOut ***
row | SN Model Sequence Highlight WHSDate
-----------------------------------------------------------------------------
0 | 1N04060RCK0207199 4060R 149,523 0 Fri Oct 04 00:00:00 CDT 2019
1 | 1N04060REK0207198 4060R 149,519 0 Mon Oct 14 00:00:00 CDT 2019
2 | 1N04060RAK0207200 4060R 149,539 0 Fri Oct 11 00:00:00 CDT 2019