Not sure if this one is quite ready for primetime. I started this a few months ago, and just got around to having something workable when there are duplacte column names.
def combine(dataList, commonCol = 't_stamp'):
from collections import OrderedDict, Counter
''' Combine multiple datasets based on a common column
dataList: list of datasets
commonCol: column name common to all datasets. Default is 't_stamp'
'''
# Convert all datsets to BasicDataset, if needed
for i, data in enumerate(dataList):
if 'com.inductiveautomation.ignition.common.BasicDataset' not in str(type(data)):
dataList[i] = system.dataset.toDataSet(data)
# Create default value dictionary containing all column names
# with None values
blankValueDict = OrderedDict()
# List to use with Counter to use fur a suffix, if needed
counterList = []
# Empty list of dictioaries to map incoming column names to new names in case of duplicates
headerLookups = []
# Iterate through the dataset headers, looking for duplicate column names
# and create a dict representing a blank row of values of the new dataset.
for data in dataList:
colNames = list(data.getColumnNames())
counterList.extend(colNames)
# count all the columns to find duplicates
counter = Counter(counterList)
# Dict to create map from columnName to new name
colDict = {}
for col in colNames:
if col != commonCol:
if counter[col] > 1:
# Duplicate column format string
formatString = '{}_{}'
else:
formatString = '{}'
#Add mapping to dicts
blankValueDict[formatString.format(col, counter[col]-1)] = None
colDict[col] = formatString.format(col, counter[col]-1)
headerLookups.append(colDict)
# Process the data
dataDict = OrderedDict()
for data, headerLookup in zip(dataList, headerLookups):
colNames = list(data.getColumnNames())
duplicateCols = []
for i in xrange(data.rowCount):
commonColValue = data.getValueAt(i, commonCol)
if commonColValue not in dataDict.keys():
dataDict[commonColValue] = blankValueDict.copy()
for col in colNames:
if col != commonCol:
dataDict[commonColValue][headerLookup[col]] = data.getValueAt(i, col)
# Create combined dataset
headers = [commonCol] + blankValueDict.keys()
data = []
# Sort by the common key
for key in sorted(dataDict.keys()):
newRow=[]
newRow.append(key)
for col in headers[1:]:
newRow.append(dataDict[key][col])
data.append(newRow)
return system.dataset.toDataSet(headers, data)
Usage example
##########################################
# Sample datasets
h1 = ['t_stamp', 'Col1', 'Col2']
d1 = [[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]
ds1 = system.dataset.toDataSet(h1, d1)
h2 = ['t_stamp', 'Col1', 'Col99']
d2 = [[1, 11, 12],
[3, 13, 14],
[5, 15, 16],
[7, 17, 18]]
ds2 = system.dataset.toDataSet(h2, d2)
##########################################
dsOut = util.dataset.combine([ds1, ds2])
util.dataset.printDataSet(dsOut)
print '\n'
dsOut = util.dataset.combine([ds1, ds2], 'Col1')
util.dataset.printDataSet(dsOut)
Output
row | t_stamp | Col1 | Col2 | Col1_1 | Col99
--------------------------------------------
0 | 1 | 2 | 3 | 11 | 12
1 | 3 | None | None | 13 | 14
2 | 4 | 5 | 6 | None | None
3 | 5 | None | None | 15 | 16
4 | 7 | 8 | 9 | 17 | 18
row | Col1 | t_stamp | Col2 | t_stamp_1 | Col99
-----------------------------------------------
0 | 2 | 1 | 3 | None | None
1 | 5 | 4 | 6 | None | None
2 | 8 | 7 | 9 | None | None
3 | 11 | None | None | 1 | 12
4 | 13 | None | None | 3 | 14
5 | 15 | None | None | 5 | 16
6 | 17 | None | None | 7 | 18