Reputation: 59
I am trying to compare one list to many lists and generate a csv file with values aligned. itertools.zip_longest does a good job but because f the need to align the output, I thought I'd construct my own version. This would help be understand generators. If there is a better approach, please let me know.
Because the first list is the one I am comparing the rest of the lists to, I thought I would use args[0] to iterate over it and compare the others to it. Because I wanted to manually run next(it) only once its value has been found, I created a cache to compare to. I believe this is where I am having an issue. I should be creating more rows that my results are showing.
def main():
a = ['apple','banana','pear']
b = ['apple','orange','orange','pear']
c = ['banana','cucumber']
d = ['1 apple','2 cherries']
zipped_data = [','.join(x) for x in zip_longest_list(a,b,c,d,)]
def zip_longest_list(*args, fillvalue=''):
iterators = [iter(it) for it in args]
num_active = len(iterators)
# I created a cache to compare lists with
cache = [{'value': '', 'isLoaded': False} for i in range(num_active)]
data = []
# check if args are valid
if not num_active:
return
# Because the first list is the one I am comparing the rest of the lists to
# I thought I would use args[0] to iterate over it and compare the others to it
# iterate over the list to compare to
for i in args[0]:
values = []
for j, it in enumerate(iterators):
value = ''
# Because I wanted to manualy run next(it) only once its value has been found
# I created a cache
# load cache
try:
if cache[i]['isLoaded'] == False:
value = next(it)
cache['value'] = value
cache[i]['isLoaded'] = True
# check if list is empty
except StopIteration:
num_active -= 1
if not num_active:
return
iterators[i] = repeat(fillvalue)
value = fillvalue
# I believe this is where I am having an issue
# I should be creating more rows that my results are showing
if cache[i]['isLoaded'] == True:
if i == cache[i]['value']:
new_row = []
[new_row.append(x['value']) for x in cache]
row.append(str(','.join([x for x in new_row])))
cache[i]['isLoaded'] = False
else:
continue
data.append(values)
for i in data:
yield i
# local copy of itertools.repeat
def repeat(object, times=None):
if times is None:
while True:
yield object
else:
for i in range(times):
yield object
if __name__ == '__main__':
main()
expected output
[',,,1 apple']
[',,,2 cherries']
['apple,apple,,']
['banana,,banana,']
[',,cucumber,']
[',orange,,']
[',orange,,']
['pear,pear,,']
actual output
['apple,,,','apple,apple,,']
['banana,apple,banana,1 apple','banana,orange,banana,1 apple']
['pear,orange,banana,1 apple']
Much appreciated
Upvotes: 0
Views: 308
Reputation: 42133
I think I may have figured out how you get your desired output. It looks like the old "file match" processes that were used to sort on magnetic tapes:
def fileMatch(*content, fillValue=None):
Done = []
iterators = [ iter(c) for c in content ]
values = [ next(i,Done) for i in iterators ]
while not all(v is Done for v in values):
matchValue = min(v for v in values if v is not Done)
matched = [ v is not Done and v == matchValue for v in values ]
yield tuple ( v if isMatch else fillValue
for v,isMatch in zip(values,matched) )
values = [ next(i,Done) if isMatch else v
for v,isMatch,i in zip(values,matched,iterators) ]
for t in fileMatch(a,b,c,d,fillValue=""): print(t)
('', '', '', '1 apple')
('', '', '', '2 cherries')
('apple', 'apple', '', '')
('banana', '', 'banana', '')
('', '', 'cucumber', '')
('', 'orange', '', '')
('', 'orange', '', '')
('pear', 'pear', '', '')
Upvotes: 2
Reputation: 27629
A few more solutions...
My original:
def zip_longest_list(*args, fillvalue=''):
its = list(map(iter, args))
dummy = object()
front = [next(it, dummy) for it in its]
while not all(x is dummy for x in front):
x = min(x for x in front if x is not dummy)
result = []
for i, y in enumerate(front):
if y is not dummy and y == x:
result.append(y)
front[i] = next(its[i], dummy)
else:
result.append(fillvalue)
yield tuple(result)
Trying to be short:
def zip_longest_list(*args, fillvalue=''):
args = [a[::-1] for a in args]
while any(args):
x = min(a[-1] for a in args if a)
yield tuple(a.pop() if x in a[-1:] else fillvalue for a in args)
Using heapq.merge
trying to be efficient for finding the min (but then realized we're spending O(len(args)) for every yielded tuple anyway):
from heapq import merge
from itertools import groupby
def zip_longest_list(*args, fillvalue=''):
def gen(i, a):
for k, g in groupby(a):
for j, v in enumerate(g):
yield v, j, i
merged = merge(*(gen(*e) for e in enumerate(args)))
for (v, _), g in groupby(merged, lambda t: t[:2]):
s = {t[2] for t in g}
yield tuple(v if i in s else fillvalue
for i in range(len(args)))
Using peekable iterators:
from more_itertools import peekable
def zip_longest_list(*args, fillvalue=''):
its = list(map(peekable, args))
while any(its):
x = min(it.peek() for it in its if it)
yield tuple(next(it) if it and it.peek() == x else fillvalue
for it in its)
Upvotes: 1