Reputation: 8788
Say I have a list L. How can I get an iterator over all partitions of K groups?
Example: L = [ 2,3,5,7,11, 13], K = 3
List of all possible partitions of 3 groups:
[ [ 2 ], [ 3, 5], [ 7,11,13] ]
[ [ 2,3,5 ], [ 7, 11], [ 13] ]
[ [ 3, 11 ], [ 5, 7], [ 2, 13] ]
[ [ 3 ], [ 11 ], [ 5, 7, 2, 13] ]
etc...
=== UPDATE ===
I was working on a solution which seems to be working, so I will just copy paste it
# -*- coding: utf-8 -*-
import itertools
# return ( list1 - list0 )
def l1_sub_l0( l1, l0 ) :
"""Substract two lists"""
#
copy_l1 = list( l1 )
copy_l0 = list( l0 )
#
for xx in l0 :
#
if copy_l1.count( xx ) > 0 :
#
copy_l1.remove( xx )
copy_l0.remove( xx )
#
return [ copy_l1, copy_l0 ]
#
def gen_group_len( n, k ) :
"""Generate all possible group sizes"""
# avoid doubles
stop_list = []
#
for t in itertools.combinations_with_replacement( xrange( 1, n - 1 ), k - 1 ) :
#
last_n = n - sum( t )
# valid group size
if last_n >= 1 :
res = tuple( sorted( t + ( last_n, ) ) )
#
if res not in stop_list :
yield res
stop_list.append( res )
# group_len = (1, 1, 3)
def gen( group_len, my_list ) :
"""Generate all possible partitions of all possible group sizes"""
#
if len( group_len ) == 1 :
yield ( tuple( my_list ), )
#
else :
# need for a stop list if 2 groups of same size
stop_list = []
#
for t in itertools.combinations( my_list, group_len[ 0 ] ) :
#
reduced_list = l1_sub_l0( my_list, t )[ 0 ]
#
for t2 in gen( group_len[ 1: ], reduced_list ) :
#
tmp = set( ( t, t2[ 0 ] ) )
#
if tmp not in stop_list :
yield ( t, ) + t2
# avoid doing same thing twice
if group_len[ 1 ] == group_len[ 0 ] :
stop_list.append( tmp )
#
my_list = [ 3,5,7,11,13 ]
n = len( my_list )
k = 3
#
group_len_list = list( gen_group_len( n, k ) )
print "for %i elements, %i configurations of group sizes" % ( n, len( group_len_list ) )
print group_len_list
#
for group_len in group_len_list :
#
print "group sizes", group_len
#
for x in gen( group_len, my_list ) :
print x
#
print "==="
Output:
for 5 elements, 2 configurations of group sizes
[(1, 1, 3), (1, 2, 2)]
group sizes (1, 1, 3)
((3,), (5,), (7, 11, 13))
((3,), (7,), (5, 11, 13))
((3,), (11,), (5, 7, 13))
((3,), (13,), (5, 7, 11))
((5,), (7,), (3, 11, 13))
((5,), (11,), (3, 7, 13))
((5,), (13,), (3, 7, 11))
((7,), (11,), (3, 5, 13))
((7,), (13,), (3, 5, 11))
((11,), (13,), (3, 5, 7))
===
group sizes (1, 2, 2)
((3,), (5, 7), (11, 13))
((3,), (5, 11), (7, 13))
((3,), (5, 13), (7, 11))
((5,), (3, 7), (11, 13))
((5,), (3, 11), (7, 13))
((5,), (3, 13), (7, 11))
((7,), (3, 5), (11, 13))
((7,), (3, 11), (5, 13))
((7,), (3, 13), (5, 11))
((11,), (3, 5), (7, 13))
((11,), (3, 7), (5, 13))
((11,), (3, 13), (5, 7))
((13,), (3, 5), (7, 11))
((13,), (3, 7), (5, 11))
((13,), (3, 11), (5, 7))
===
Upvotes: 7
Views: 3734
Reputation: 3643
Edited: As noted by @moose, the following only determines partitions where contiguous indices are in the same cluster. Performing this partitioning over all permutations will give the sought answer.
Itertools is very useful for this sort of combinatoric listing. First, we consider your task as the equivalent problem of selecting all sets of k-1
distinct split points in the array. This is solved by itertools.combinations, which returns combinations without replacement of a particular size from a given iterable, and the values it returns are in the order that they are found in the original iterable.
Your problem is thus solved by the following:
import itertools
def neclusters(l, K):
for splits in itertools.combinations(range(len(l) - 1), K - 1):
# splits need to be offset by 1, and padded
splits = [0] + [s + 1 for s in splits] + [None]
yield [l[s:e] for s, e in zip(splits, splits[1:])]
numpy's split function is designed to make these sorts of partitions given split offsets, so here's an alternative that generates lists of numpy arrays:
import itertools
def neclusters(l, K):
for splits in itertools.combinations(range(len(l) - 1), K - 1):
yield np.split(l, 1 + np.array(splits))
Upvotes: 1
Reputation: 445
A reasonably efficient way is to pivot on the first element in each recursion to force uniqueness, and simply go through combinations of all increasing sizes up to the point it would give empty subsets.
def kpartitions(l, k):
import itertools
if k == 1: yield [l]; return
for i in range(1, len(l)-k+1+1):
s = set(range(1, len(l)))
for comb in itertools.combinations(s, i-1):
for t in kpartitions([l[idx] for idx in s - set(comb)], k-1):
yield [[l[0], *(l[idx] for idx in comb)], *t]
def stirlingsecond(n, k):
import math
return sum((-1 if (i & 1 != 0) else 1) * math.comb(k, i)*((k-i)**n)
for i in range(k+1)) // math.factorial(k)
assert len(list(kpartitions([3,5,7,11,13], 3))) == stirlingsecond(5, 3)
assert len(list(kpartitions([2,3,5,7,11,13], 3))) == stirlingsecond(6, 3)
This is quite efficient though it does a little extra work to find the elements not in the combinations, as itertools.combinations is convenient, though writing a combination function which yields both the combination and those elements not in it might give a constant time improvement.
Upvotes: 0
Reputation: 44545
Filter partitions of size k
using more_itertools.partitions
(note the trailing "s"):
Given
import itertools as it
import more_itertools as mit
iterable = [2, 3, 5, 7, 11]
k = 3
Demo
res = [p for perm in it.permutations(iterable) for p in mit.partitions(perm) if len(p) == k]
len(res)
# 720
res
# [[[2], [3], [5, 7, 11]],
# [[2], [3, 5], [7, 11]],
# [[2], [3, 5, 7], [11]],
# [[2, 3], [5], [7, 11]],
# [[2, 3], [5, 7], [11]],
# [[2, 3, 5], [7], [11]],
# ...
# [[3], [2], [5, 7, 11]],
# [[3], [2, 5], [7, 11]],
# [[3], [2, 5, 7], [11]],
# [[3, 2], [5], [7, 11]],
# [[3, 2], [5, 7], [11]],
# [[3, 2, 5], [7], [11]],
# [[3], [2], [5, 11, 7]],
# ...
# ]
This version gives partitions of a permuted input. Partitions of repeated elements may be included, e.g. [[3,], [5,], [7, 11, 13]] and [[7, 11, 13]], [3,], [5,]]
.
Note: more_itertools
is a third-party package. Install via > pip install more_itertools
.
Upvotes: 3
Reputation: 3643
A simple alternative view of this problem is the assignment of one of the three cluster labels to each element.
import itertools
def neclusters(l, k):
for labels in itertools.product(range(k), repeat=len(l)):
partition = [[] for i in range(k)]
for i, label in enumerate(labels):
partition[label].append(l[i])
yield partition
as with @val's answer, this can be wrapped to remove partitions with empty clusters.
Upvotes: 2
Reputation: 8689
This works, although it is probably super inneficient (I sort them all to avoid double-counting):
def clusters(l, K):
if l:
prev = None
for t in clusters(l[1:], K):
tup = sorted(t)
if tup != prev:
prev = tup
for i in xrange(K):
yield tup[:i] + [[l[0]] + tup[i],] + tup[i+1:]
else:
yield [[] for _ in xrange(K)]
It also returns empty clusters, so you would probably want to wrap this in order to get only the non-empty ones:
def neclusters(l, K):
for c in clusters(l, K):
if all(x for x in c): yield c
Counting just to check:
def kamongn(n, k):
res = 1
for x in xrange(n-k, n):
res *= x + 1
for x in xrange(k):
res /= x + 1
return res
def Stirling(n, k):
res = 0
for j in xrange(k + 1):
res += (-1)**(k-j) * kamongn(k, j) * j ** n
for x in xrange(k):
res /= x + 1
return res
>>> sum(1 for _ in neclusters([2,3,5,7,11,13], K=3)) == Stirling(len([2,3,5,7,11,13]), k=3)
True
It works !
The output:
>>> clust = neclusters([2,3,5,7,11,13], K=3)
>>> [clust.next() for _ in xrange(5)]
[[[2, 3, 5, 7], [11], [13]], [[3, 5, 7], [2, 11], [13]], [[3, 5, 7], [11], [2, 13]], [[2, 3, 11], [5, 7], [13]], [[3, 11], [2, 5, 7], [13]]]
Upvotes: 3