Okay, I've definitely spent way too much time on this now, but it's interesting, dammit.
A 'more fair' imperative translation of the nested comprehensions is much closer to the same performance (the majority of the difference before was slow string indexing/replacement operations); in fact, it's sometimes slightly faster:
For 100 users
Comprehension
mean 207955.52ns
stdDev 10582.36ns
Imperative
mean 213307.58ns
stdDev 9908.74ns
Pascal
mean 241134.32ns
stdDev 7834.83ns
Filtering by role
Comprehension
mean 132700.64ns
stdDev 3535.45ns
Imperative
mean 114241.66ns
stdDev 22767.67ns
Pascal
mean 134536.15ns
stdDev 2964.58ns
For 1000 users
Comprehension
mean 2068981.27ns
stdDev 54455.94ns
Imperative
mean 2137922.57ns
stdDev 95904.05ns
Pascal
mean 2509863.96ns
stdDev 211672.25ns
Filtering by role
Comprehension
mean 1048834.59ns
stdDev 82326.93ns
Imperative
mean 1058578.15ns
stdDev 94194.53ns
Pascal
mean 1094843.70ns
stdDev 92648.91ns
For 10000 users
Comprehension
mean 21904184.91ns
stdDev 2662372.10ns
Imperative
mean 19787442.22ns
stdDev 644613.36ns
Pascal
mean 22563649.73ns
stdDev 749047.54ns
Filtering by role
Comprehension
mean 9131573.53ns
stdDev 425773.98ns
Imperative
mean 9057692.98ns
stdDev 399956.14ns
Pascal
mean 8859192.44ns
stdDev 158626.05ns
Summary
def retrieveContactList(users, UserRole=None, ContactType="email"):
return [
ci.value
for user in users if (UserRole is None or UserRole in user.roles)
for ci in user.contactInfo if ci.contactType == ContactType
]
def retrieveContactList2(userList, UserRole=None, ContactType="email"):
ourContactList = []
for user in userList:
if UserRole is None or UserRole in user.roles:
for contactInfo in user.contactInfo:
if contactInfo.contactType == ContactType:
ourContactList.append(contactInfo.value)
return ourContactList
def list_contacts(user_list, user_role=None, contact_type='email'):
users = [u for u in user_list if user_role in u.roles or user_role is None]
return [ci.value for u in users for ci in u.contactInfo if ci.contactType == contact_type]
from java.lang import System
from functools import partial
import random
from com.inductiveautomation.ignition.common.user import ContactInfo
def tukeys_fences_filter(data, k=1.5):
# Calculate quartiles
data.sort()
n = len(data)
q1_index = int(n * 0.25)
q3_index = int(n * 0.75)
q1 = data[q1_index]
q3 = data[q3_index]
# Calculate interquartile range (IQR)
iqr = q3 - q1
# Calculate fences
lower_fence = q1 - k * iqr
upper_fence = q3 + k * iqr
# print data[0], lower_fence, upper_fence, data[-1]
# Filter data within the fences
return [x for x in data if lower_fence <= x <= upper_fence]
def timeIt(name, fn, loops=1000):
def timer():
mark = System.nanoTime()
fn()
return System.nanoTime() - mark
toAverage = tukeys_fences_filter([float(timer()) for _ in xrange(loops)])
print '\t', name
print '\tmean {:.2f}ns'.format(system.math.mean(toAverage))
print '\tstdDev {:.2f}ns'.format(system.math.standardDeviation(toAverage))
class FakeUser(object):
possible_roles = ["Administrator", "Operator", "Supervisor", "Technician"]
possible_contact_infos = ["email", "sms", "phone"]
def __init__(self):
self.roles = [self.create_role() for _ in xrange(random.randrange(len(self.possible_roles)))]
self.contactInfo = [self.create_contact_info() for _ in xrange(random.randrange(5))]
def create_role(self):
return random.choice(self.possible_roles)
def create_contact_info(self):
return ContactInfo(
random.choice(self.possible_contact_infos),
''.join(random.sample("abcdefghijklmnopqrstuvwxyz", 20))
)
for n in [100, 1000, 10000]:
users = [FakeUser() for _ in xrange(n)]
print 'For', n, 'users'
timeIt("Comprehension", partial(retrieveContactList, users))
timeIt("Imperative", partial(retrieveContactList2, users))
timeIt("Pascal", partial(list_contacts, users))
print ' Filtering by role'
timeIt("Comprehension", partial(retrieveContactList, users, "Operator"))
timeIt("Imperative", partial(retrieveContactList2, users, "Operator"))
timeIt("Pascal", partial(list_contacts, users, "Operator"))
print
print
I reserve the right to have done something fundamentally wrong somewhere
Either way, maybe someone will find some of the code in there reusable for something else.