Reputation: 23
I have to process a huge number of tuples made by k integers, each ranging from 1 to Max_k.
Each Max can be different. I need to skip the tuples where an element has reached is max value, in that case keeping only the tuple with "1" in the remaining position. The max is enforced by design, so it cannot be that some item is > of its max For example, if the max of the second element of a triple is 4, i need to keep (1,4,1) but skip (1,4,2) , (1,4,3) ... (2,4,1) etc.
I am pretty sure I am missing a much faster way to do that. My typical scenario is tuples with 16 to 20 elements, with maxes in the 50-70 mark. What would be the recommended approach ?
In Python, as a toy example with hardcoded Maxes (5,4,2), is the following:
from itertools import *
def filter_logic(y):
if y[0]==5:
if y[1] > 1 or y[2] >1:
return True
if y[1]==4:
if y[0] > 1 or y[2] >1:
return True
if y[2]==2:
if y[0] > 1 or y[1] >1:
return True
return False
def tuples_all(max_list):
my_iterables = []
for limit in max_list:
my_iterables.append(range(1, limit+1))
return product(*my_iterables)
def tuples_filtered(max_list):
return filterfalse(filter_logic, tuples_all(max_list))
max_list = [5,4,2]
print("Original list")
for res in tuples_all(max_list):
print(res)
print("After filtering")
for fil in tuples_filtered(max_list):
print(fil)
Output of the filtered tuples:
After filtering
(1, 1, 1)
(1, 1, 2)
(1, 2, 1)
(1, 3, 1)
(1, 4, 1)
(2, 1, 1)
(2, 2, 1)
(2, 3, 1)
(3, 1, 1)
(3, 2, 1)
(3, 3, 1)
(4, 1, 1)
(4, 2, 1)
(4, 3, 1)
(5, 1, 1)
Upvotes: 2
Views: 90
Reputation: 10465
Since you commented that order between tuples isn't important, we can simply produce the tuples with max value and then the tuples without max value:
from itertools import *
def tuples_direct(max_list):
n = len(max_list)
# Special case
if 1 in max_list:
yield (1,) * n
return
# Tuples with a max.
for i, m in enumerate(max_list):
yield (1,) * i + (m,) + (1,) * (n-i-1)
# Tuples without a max.
yield from product(*(range(1, m) for m in max_list))
max_list = [5,4,2]
for tup in tuples_direct(max_list):
print(tup)
Output (Attempt This Online!):
(5, 1, 1)
(1, 4, 1)
(1, 1, 2)
(1, 1, 1)
(1, 2, 1)
(1, 3, 1)
(2, 1, 1)
(2, 2, 1)
(2, 3, 1)
(3, 1, 1)
(3, 2, 1)
(3, 3, 1)
(4, 1, 1)
(4, 2, 1)
(4, 3, 1)
Upvotes: 0
Reputation: 4927
Assumptions about the rules for discarding a tuple:
A
>=
the max value for its index, AND
>= 1
B
>=
the max value for their respective indicessuch that if either rule A or rule B is hit, the tuple is tossed.
If this ruleset describes your problem, the solution is quite managable. From my basic testing, it's relatively performant even in native python - but I've also included a numpy
solution which should hypothetically be orders of magnitude faster if you have so much data data that native python is noticeably slow.
import itertools
import numpy as np
def should_discard(values, max_values):
any_max_val = False
any_gt_one_val = False
for val, max_val in zip(values, max_values): # zip truncates automatically
#print(f"Val: {val} < {max_val}: {max_val >= val}") # Manual inspection
if val >= max_val:
if any_max_val:
any_gt_one_val = True
any_max_val = True
elif val > 1:
any_gt_one_val = True
if any_max_val and any_gt_one_val:
return True
return False
def should_discard_numpy(values, max_values):
values = np.array(values)
max_values = np.array(max_values[:len(values)]) # Truncate max_values
ge_max_arr = values >= max_values
if np.sum(ge_max_arr) >= 2:
return True
any_ge_max = np.any(ge_max_arr)
any_gt_one = np.any((values > 1) & ~ge_max_arr) # A.2 - compare for >1 but exclude the val(s) that hit A.1
if any_ge_max and any_gt_one:
return True
return False
def filter_tuples(func, tuples, max_values):
return itertools.filterfalse(lambda t: func(t, max_values), tuples)
vals = [
(1, 4, 1), # Keep
(1, 4, 2), # Discard - rule A
(2, 4, 1), # Discard - rule A
(0, 4, 1), # Keep
(1, 3, 3), # Discard - rule A
(2, 1, 3), # Discard - rule A OR rule B
]
max_vals = (2, 4, 3)
vals2 = [
(1, 2, 1, 5, 3, 1, 6, 1, 0, 1, 1, 1, 1, 2, 0, 1, 0, 0, 0, 7), # Discard - rule A
(2, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1), # Discard - rule B
(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5), # Keep
(1, 1, 0, 4, 3, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), # Keep
(1, 1, 0, 4, 3, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1), # Discard - rule A
]
max_vals2 = (2, 2, 2, 5, 4, 1, 6, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 7)
print("Native python:", list(filter_tuples(should_discard, vals, max_vals)))
print("Numpy :", list(filter_tuples(should_discard_numpy, vals, max_vals)))
print("Native python:", list(filter_tuples(should_discard, vals2, max_vals2)))
print("Numpy :", list(filter_tuples(should_discard_numpy, vals2, max_vals2)))
Upvotes: -1