pmichna
pmichna

Reputation: 4888

How to generate all subsets of a given size?

Given some number n, and a subset size, I want to get all possible subsets of the specified size of the set {1, ..., n}.

Expected result for for n = 5 and subsetSize = 4:

{{1,2,3,4}, {1,2,3,5}, {1,3,4,5}, {1,2,4,5}, {2,3,4,5}}

(that would be a List<List<int>>)

That means I need to get (subsetSize choose n) subsets (Newton's symbol).

Any idea for an algorithm that could find me such a list of lists of integers? I'm implementing it in C#, if that matters.

Upvotes: 6

Views: 5722

Answers (5)

David Laeremans
David Laeremans

Reputation: 1

I created following IEnumerable extension methods to get all subsets of size n in an array with optionally a predicate.

public static class ExtensionMethods
{
    public static IEnumerable<IEnumerable<T>> GetSubSets<T>(this IEnumerable<T> list, int size, Predicate<T> filter)
    {
        return GetSubSetsInternal(list.ToList().FindAll(filter), size);
    }
    public static IEnumerable<IEnumerable<T>> GetSubSets<T>(this IEnumerable<T> list, int size)
    {
        return GetSubSetsInternal(list, size);
    }
    private static IEnumerable<IEnumerable<T>> GetSubSetsInternal<T>(this IEnumerable<T> list, int size)
    {
        var vFinalList = new List<IList<T>>();
        var c = size;

        var vList = list.ToList();

        var vListSize = list.Count();

        if (c <= 0 || size > vListSize) return vFinalList;

        c -= 1;

        var indexs = new int[size];
        var vFirstSubList = new List<T>();

        for (var j = 0; j <= (size - 1); j++)
        {
            indexs[j] = j;
            vFirstSubList.Add(vList[j]);
        }


        vFinalList.Add(vFirstSubList);

        while (indexs[0] !=(vListSize - size) && vFinalList.Count < 2147483647)
        {
            if (indexs[c] < c + (vListSize - size))
            {
                indexs[c]++;

                var vSubList = new List<T>();
                foreach (var j in indexs) vSubList.Add(vList[j]);
                vFinalList.Add(vSubList);
            }
            else
            {
                do
                {
                    c -= 1;
                } while (indexs[c] == c + (vListSize - size));
                indexs[c] += 1;
                for (var j = c + 1; j <= (size - 1); j++) indexs[j] = indexs[j - 1] + 1;
                var vSubList = new List<T>();
                foreach (var j in indexs) vSubList.Add(vList[j]);
                vFinalList.Add(vSubList);
                c = size - 1;
            }
        }
        return vFinalList;
    }
}

Usage: for example determining naked sets in sudoku.

internal class SolverStepNakedSet : SolverStepSetBase
{

    protected override bool SolveIt(SolutionManager solutionManager, int size)
    {
        foreach (var vSol in (from vec in solutionManager.Sudoku.Vectors
                              from vSet in vec.GetSubSets(size, c => Enumerable.Range(2, size - 1).Contains(c.MyValue.OpenCandidates.Count()))
                              let vValues = vSet.SelectMany(c => c.MyValue.OpenCandidates).Select(cc=> cc.Value).Distinct().ToList()
                              where vValues.Count() == size
                              let vLstCellsValues = (from vValue in vValues
                                                     let vLstCells = vec.FindAll(c => !vSet.Contains(c) && c.MyValue.OpenCandidates.Any(cc=>cc.Value==vValue))
                                                     where vLstCells.Count() > 0
                                                     select new SolutionValueElement(vValue, vLstCells))
                              where vLstCellsValues.Count() > 0
                              select new SolutionSet( Step, new SolutionValue(ValueState.Removal, vLstCellsValues), new Set(vSet.ToList(),vValues))))
        {

            if (solutionManager.Execute(vSol)) return true;
        }
        return false;
    }
}

Upvotes: 0

Jesus is Lord
Jesus is Lord

Reputation: 15399

I needed this today (my previous solution was too inefficient). It's faster than a powerset approach. It returns an IEnumerable, so this should be relatively memory efficient, too, if you don't need them all in memory at once.

Here's what I came up with:

public class Subsets
{
    public static IEnumerable<IList<int>> Compute(int countOfSet, int countOfSubset)
    {
        if (countOfSet < 0)
        {
            throw new ArgumentException(@"countOfSet must be at least 0");
        }

        if (countOfSubset < 0)
        {
            throw new ArgumentException(@"countOfSet must be at least 0");
        }

        if (countOfSubset > countOfSet)
        {
            throw new ArgumentException(@"countOfSubset must less than or equal to countOfSet");
        }

        //var allChoices = Enumerable.Range(0, countOfSet);
        //var result = GenerateOld(countOfSubset, allChoices, Enumerable.Empty<int>()).ToArray();
        var result = Generate(countOfSet, countOfSubset);

        return result;
    }

    public static IEnumerable<IList<int>> Generate(int countOfSet, int countOfSubset)
    {
        // If countOfSet is 5 and countofSubset is 3, maxCounterValue will be 234
        var maxCounterValue = Enumerable.Range(0, countOfSet)
            .Reverse()
            .Take(countOfSubset)
            .Reverse()
            .ToArray();

        var counters = new int[countOfSubset];

        // Initialize counters to 0,1,2,3,...
        foreach (var i in Enumerable.Range(0, countOfSubset))
        {
            counters[i] = i;
        }

        while (true)
        {
            yield return counters.ToArray();

            if (counters.SequenceEqual(maxCounterValue))
            {
                break;
            }

            var currentCounter = countOfSubset - 1;

            counters[currentCounter]++;

            // In case there was overflow, increment previous counters.
            while (counters[currentCounter] > maxCounterValue[currentCounter])
            {
                currentCounter--;
                counters[currentCounter]++;
            }
            // "Reset" counters on the way back up.
            while (currentCounter < countOfSubset - 1)
            {
                counters[currentCounter + 1] = counters[currentCounter] + 1;
                currentCounter++;
            }
        }
    }

Here are my tests (NUnit 3):

using NUnit.Framework;
using System.Linq;

//[Timeout(1000)]
public class SubsetsTests
{
    [Test]
    public void Compute0_1()
    {
        var act = new TestDelegate(() => Subsets.Compute(0, 1).ToArray());
        Assert.That(act, Throws.Exception);
    }

    [Test]
    public void Compute_1_0()
    {
        var act = new TestDelegate(() => Subsets.Compute(-1, 0).ToArray());
        Assert.That(act, Throws.Exception);
    }

    [Test]
    public void Compute1__1()
    {
        var act = new TestDelegate(() => Subsets.Compute(1, -1).ToArray());
        Assert.That(act, Throws.Exception);
    }

    [Test]
    public void Compute0_0()
    {
        var result = Subsets.Compute(0, 0).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new int[0]));
    }

    [Test]
    public void Compute1_1()
    {
        var result = Subsets.Compute(1, 1).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0 }));
    }

    [Test]
    public void Compute1_0()
    {
        var result = Subsets.Compute(1, 0).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new int[0]));
    }

    [Test]
    public void Compute2_2()
    {
        var result = Subsets.Compute(2, 2).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0, 1 }));
    }

    [Test]
    public void Compute2_1()
    {
        var result = Subsets.Compute(2, 1).ToArray();
        Assert.That(result.Length, Is.EqualTo(2));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0 }));
        Assert.That(result[1], Is.EquivalentTo(new[] { 1 }));
    }

    [Test]
    public void Compute2_0()
    {
        var result = Subsets.Compute(2, 0).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new int[0]));
    }

    [Test]
    public void Compute3_3()
    {
        var result = Subsets.Compute(3, 3).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0, 1, 2 }));
    }

    [Test]
    public void Compute3_2()
    {
        var result = Subsets.Compute(3, 2).ToArray();
        Assert.That(result.Length, Is.EqualTo(3));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0, 1 }));
        Assert.That(result[1], Is.EquivalentTo(new[] { 0, 2 }));
        Assert.That(result[2], Is.EquivalentTo(new[] { 1, 2 }));
    }

    [Test]
    public void Compute3_1()
    {
        var result = Subsets.Compute(3, 1).ToArray();
        Assert.That(result.Length, Is.EqualTo(3));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0 }));
        Assert.That(result[1], Is.EquivalentTo(new[] { 1 }));
        Assert.That(result[2], Is.EquivalentTo(new[] { 2 }));
    }

    [Test]
    public void Compute3_0()
    {
        var result = Subsets.Compute(2, 0).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new int[0]));
    }

    [Test]
    public void Compute4_4()
    {
        var result = Subsets.Compute(4, 4).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0, 1, 2, 3 }));
    }

    [Test]
    public void Compute4_3()
    {
        var result = Subsets.Compute(4, 3).ToArray();
        Assert.That(result.Length, Is.EqualTo(4));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0, 1, 2 }));
        Assert.That(result[1], Is.EquivalentTo(new[] { 0, 1, 3 }));
        Assert.That(result[2], Is.EquivalentTo(new[] { 0, 2, 3 }));
        Assert.That(result[3], Is.EquivalentTo(new[] { 1, 2, 3 }));
    }

    [Test]
    public void Compute4_2()
    {
        var result = Subsets.Compute(4, 2).ToArray();
        Assert.That(result.Length, Is.EqualTo(6));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0, 1 }));
        Assert.That(result[1], Is.EquivalentTo(new[] { 0, 2 }));
        Assert.That(result[2], Is.EquivalentTo(new[] { 0, 3 }));
        Assert.That(result[3], Is.EquivalentTo(new[] { 1, 2 }));
        Assert.That(result[4], Is.EquivalentTo(new[] { 1, 3 }));
        Assert.That(result[5], Is.EquivalentTo(new[] { 2, 3 }));
    }

    [Test]
    public void Compute4_1()
    {
        var result = Subsets.Compute(4, 1).ToArray();
        Assert.That(result.Length, Is.EqualTo(4));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0 }));
        Assert.That(result[1], Is.EquivalentTo(new[] { 1 }));
        Assert.That(result[2], Is.EquivalentTo(new[] { 2 }));
        Assert.That(result[3], Is.EquivalentTo(new[] { 3 }));
    }

    [Test]
    public void Compute5_5()
    {
        var result = Subsets.Compute(5, 5).ToArray();
        Assert.That(result.Length, Is.EqualTo(1));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0, 1, 2, 3, 4 }));
    }

    [Test]
    public void Compute5_4()
    {
        var result = Subsets.Compute(5, 4).ToArray();
        Assert.That(result.Length, Is.EqualTo(5));
        Assert.That(result[0], Is.EquivalentTo(new[] { 0, 1, 2, 3 }));
        Assert.That(result[1], Is.EquivalentTo(new[] { 0, 1, 2, 4 }));
        Assert.That(result[2], Is.EquivalentTo(new[] { 0, 1, 3, 4 }));
        Assert.That(result[3], Is.EquivalentTo(new[] { 0, 2, 3, 4 }));
        Assert.That(result[4], Is.EquivalentTo(new[] { 1, 2, 3, 4 }));
    }

    [Test]
    public void Compute5_3()
    {
        var result = Subsets.Compute(5, 3).ToArray();
        Assert.That(result.Length, Is.EqualTo(10));
        var i = 0;
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 1, 2 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 1, 3 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 1, 4 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 2, 3 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 2, 4 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 3, 4 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 1, 2, 3 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 1, 2, 4 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 1, 3, 4 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 2, 3, 4 }));
    }

    [Test]
    public void Compute5_2()
    {
        var result = Subsets.Compute(5, 2).ToArray();
        Assert.That(result.Length, Is.EqualTo(10));
        var i = 0;
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 1 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 2 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 3 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0, 4 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 1, 2 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 1, 3 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 1, 4 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 2, 3 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 2, 4 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 3, 4 }));
    }

    [Test]
    public void Compute5_1()
    {
        var result = Subsets.Compute(5, 1).ToArray();
        Assert.That(result.Length, Is.EqualTo(5));
        var i = 0;
        Assert.That(result[i++], Is.EquivalentTo(new[] { 0 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 1 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 2 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 3 }));
        Assert.That(result[i++], Is.EquivalentTo(new[] { 4 }));
    }
}

Upvotes: 0

Boris Simeonov
Boris Simeonov

Reputation: 21

You can use the bit masking alg. This logic checks for the count of the elements in the subset, the sum of each unique subset and sorts the result. This one is raw and it can be optimised for better exec. speed and the unneeded conditions can be removed.

class Program
{
    static void Main(string[] args)
    {
        int number, cntOfElements;
        int.TryParse(Console.ReadLine(), out number);
        int.TryParse(Console.ReadLine(), out cntOfElements);
        int[] intArray = Regex.Split(Console.ReadLine(), @"\s+").Select(int.Parse).Distinct().ToArray();
        bool hasMatchingSubs = false;
        //The amount of possible combinations are 2 of power - the count of integers in the array
        //if they (the numbers) are 3 the combinations with the zero are 8 (in binary 0000 1000)
        //so we have 3 numbers and the bits before the setted bit of the 8 are the same count
        //this means that we can use them to get all the unique combinations for that amount of numbers 
        //7 combinations without the zero (in binary 0000 0111).
        int combos = (int)Math.Pow(2, intArray.Length);
        List<List<int>> result = new List<List<int>>();

        //we cicle around all the possible combinations
        for (int mask = 1; mask < combos; mask++)
        {
            int sum = 0;
            List<int> list = new List<int>();
            //In the second for construction when the corresponding bit is set wi add this value
            //to the sum for this combination and when the bit is 0 we skip the adding
            for (int idx = 0; idx < intArray.Length; idx++)
            {
                if ((mask >> idx & 1) == 1)
                {
                    sum += intArray[(intArray.Length - 1) - idx];
                    list.Add(intArray[(intArray.Length - 1) - idx]);
                }
            }

            //We are checking the sum for this combination before the next one
            if (sum == number && list.Count() == cntOfElements)
            {
                hasMatchingSubs = true;
                result.Add(list);
            }
        }

        if (!hasMatchingSubs)
        {
            Console.WriteLine("No matching subsets.");
        }
        else
        {
            result.ForEach(p => p.Sort()); //Sorting all elements in the inner lists
            result = result.OrderBy(a => a.Count).ThenBy(b => b.First()).ToList();  //ordering by the count of items in each list
                                                                                    //and than by the value of the sirst integer
            result.ForEach(p => Console.WriteLine("{0} = {1}", string.Join(" + ", p), number));
        }
    }
}

Upvotes: 1

PiotrWolkowski
PiotrWolkowski

Reputation: 8782

What you are looking for are combinations without repetition. Below a link that should be a good starting point:

http://www.codeproject.com/Articles/26050/Permutations-Combinations-and-Variations-using-C-G

You need also a recursive implementation. Otherwise you won't be able to specify dynamically the number of items in each set. Here you can find some further explanations:

http://erwinmayer.com/2010/12/14/recursive-algorithm-to-generate-all-combinations-of-elements-in-arrays/

Upvotes: 0

pero
pero

Reputation: 4259

internal class Program
{
    private static IEnumerable<IEnumerable<int>> Subsets(int n, int subsetSize)
    {
        IEnumerable<int> sequence = Enumerable.Range(1, n);

        // generate list of sequences containing only 1 element e.g. {1}, {2}, ...
        var oneElemSequences = sequence.Select(x => new[] { x }).ToList();

        // generate List of int sequences
        var result = new List<List<int>>();
        // add initial empty set
        result.Add(new List<int>());

        // generate powerset, but skip sequences that are too long
        foreach (var oneElemSequence in oneElemSequences)
        {
            int length = result.Count;

            for (int i = 0; i < length; i++)
            {
                if (result[i].Count >= subsetSize)
                    continue;

                result.Add(result[i].Concat(oneElemSequence).ToList());
            }
        }

        return result.Where(x => x.Count == subsetSize);
    }

    private static void OutputSubset(int n, IEnumerable<IEnumerable<int>> subsets)
    {
        Console.WriteLine("n: {0}", n);
        foreach (var subset in subsets)
        {
            Console.WriteLine("\t{0}", string.Join(" ", subset.Select(x => x.ToString())));
        }
    }

    private static void Main()
    {
        for (int n = 1; n < 500; n++)
        {
            var subsets = Subsets(n, subsetSize: 4);
            OutputSubset(n, subsets);
        }
    }
}

Output:

n: 1
n: 2
n: 3
n: 4
        1 2 3 4
n: 5
        1 2 3 4
        1 2 3 5
        1 2 4 5
        1 3 4 5
        2 3 4 5
n: 6
        1 2 3 4
        1 2 3 5
        1 2 4 5
        1 3 4 5
        2 3 4 5
        1 2 3 6
        1 2 4 6
        1 3 4 6
        2 3 4 6
        1 2 5 6
        1 3 5 6
        2 3 5 6
        1 4 5 6
        2 4 5 6
        3 4 5 6
n: 7
        1 2 3 4
        1 2 3 5
        1 2 4 5
        1 3 4 5
        2 3 4 5
        1 2 3 6
        ...

Upvotes: 2

Related Questions