Reputation: 53
I have an ordered list
L= [330.56, 330.6,330.65,330.7, .....]
and I want to group it by a certain tolerance -+, but I can have duplicates, so I need to iterate over every element ( see if there are other elements within element-tol and element +tol) and then delete if there is a complete sublist
The code I'm using look like this but there are no duplicates and only check next elements
def mw_grouper(iterable):
group = []
for item in iterable:
if not group or item - group[0] <= 0.05:
group.append(item)
else:
yield group
group = [item]
if group:
yield group
What I get is more of, I need to also check the previous elements
R = [[330.56, 330.6], [330.65], [330.7]]
The output I want
R = [[330.56, 330.6],[330.56, 330.6, 330.65], [330.6,330.65,330.7], [330.7, 330.65]]
and then delete the sublists
F = [[330.56, 330.6, 330.65], [330.6,330.65,330.7]]
Upvotes: 0
Views: 90
Reputation: 3409
Ok, this should work, though I didn't test it intensively, so there might be unforeseen errors.
group
), but it would probably have been simpler (though more memory intensive in case of huge lists) to implement a list for each element of the iterable.I may have overthought this and overlooked a simpler implementation, but that's the best I'm able to produce at this time ;)
def group_by_tolerance(my_iterable, tolerance):
active_index = 0 # The index (in group) of the next element "whose" group has to be yielded
group = []
last_released_group = []
for number in my_iterable:
if not group:
group = [number]
continue
elif number - group[active_index] <= tolerance:
group.append(number)
continue
while group and number - group[active_index] > tolerance:
# check that this group is not a subgroup of the next one
if active_index >= len(group)-1 or group[active_index+1] - group[0] > tolerance:
# check that this group is not a subgroup of the previous one
if len(last_released_group) < len(group) or group != last_released_group[-len(group)]:
last_released_group = group.copy()
yield last_released_group
active_index += 1
if active_index >= len(group):
active_index = 0
group = []
continue
while group[active_index] - group[0] > tolerance:
group.pop(0)
active_index -= 1
group.append(number)
if group:
yield group
L= [330.56, 330.6,330.65,330.7]
print(list(group_by_tolerance(L,0.01)))
# [[330.56], [330.6], [330.65], [330.7]]
print(list(group_by_tolerance(L,0.051)))
# [[330.56, 330.6, 330.65], [330.6, 330.65, 330.7]]
print(list(group_by_tolerance(L,0.1)))
# [[330.56, 330.6, 330.65, 330.7]]
Here's the code tweaked to return (index, value) instead of value only; you can then treat the output to get the values only, or the indices only:
def group_by_tolerance(my_iterable, tol):
active_index = 0
group = []
last_released_group = []
for i,number in enumerate(my_iterable):
if not group:
group = [(i,number)]
continue
elif number - group[active_index][1] <= tol:
group.append((i,number))
continue
while group and number - group[active_index][1] > tol:
# check that this group is not a subgroup of the next one
if active_index >= len(group)-1 or group[active_index+1][1] - group[0][1] > tol:
# check that this group is not a subgroup of the previous one
if len(last_released_group) < len(group) or group != last_released_group[-len(group)]:
last_released_group = group.copy()
yield last_released_group
active_index += 1
if active_index >= len(group):
active_index = 0
group = []
continue
while group[active_index][1] - group[0][1] > tol:
group.pop(0)
active_index -= 1
group.append((i,number))
if group:
yield group
print(list(group_by_tolerance(L,0.051)))
# [[(0, 330.56), (1, 330.6), (2, 330.65)], [(1, 330.6), (2, 330.65), (3, 330.7)]]
Upvotes: 1