Find duplicates in a dataset and sum their values

I’m sure there’s a more clever solution out there, but try this:

headers = ["City", "Population", "Timezone", "GMTOffset"]
data = []
data.append(["New York", 8363710, "EST", -5])
data.append(["Los Angeles", 3833995, "PST", -8])
data.append(["Chicago", 2853114, "CST", -6])
data.append(["Houston", 2242193, "CST", -6])
data.append(["Phoenix", 1567924, "MST", -7])
data.append(["New York", 532, "EST", -5])

cities = system.dataset.toDataSet(headers, data)

d = {}

for row in system.dataset.toPyDataSet(cities):
	city = row[0]
	
	if (city in d):
		population = d[city].pop(0)
		population = population + row[1]
		d[city].insert(0, population)
	else:
		d[city] = [row[1], row[2], row[3]]

summed_data = [[key, d[key][0], d[key][1], d[key][2]] for key in d]
summed_cities = system.dataset.toDataSet(headers, data)

print summed_data
1 Like