Seif
Seif

Reputation: 82

Select rows where sum of a column equals a fixed value in R

I would like to get one (or all) possible combination of rows where sum of quantity column equals 20

here an example :

structure(list(id = 1:10, quantity = c(11L, 1L, 4L, 12L, 19L, 10L, 3L, 13L, 16L, 14L)), class ="data.frame", row.names = c(NA,-10L))

id quantity
1   11          
2   1           
3   4           
4   12          
5   19          
6   10          
7   3           
8   13          
9   16          
10  14  

desired output (one possible set) :

id quantity
3   4           
7   3           
8   13

or

id quantity
 2  1           
 5  19          

Upvotes: 2

Views: 886

Answers (2)

ThomasIsCoding
ThomasIsCoding

Reputation: 101538

Here is another base R solution by defining a recursive function subsetSum (I guess this would be faster since it avoids checking through all combinations)

subsetSum <- function(v, target, r = c()) {
    if (sum(r) == target) {
        return(list(r))
    }
    unlist(lapply(seq_along(v), function(k) subsetSum(v[-(1:k)], target, c(r, v[k]))), recursive = FALSE)
}

Then, when running

target <- 20
lst <- subsetSum(setNames(df$quantity, seq(nrow(df))), target)
res <- Map(function(v) df[as.integer(names(v)), ], lst)

you will get

> res
[[1]]
  id quantity
2  2        1
3  3        4
4  4       12
7  7        3

[[2]]
  id quantity
2  2        1
5  5       19

[[3]]
  id quantity
2  2        1
7  7        3
9  9       16

[[4]]
  id quantity
3  3        4
7  7        3
8  8       13

[[5]]
  id quantity
3  3        4
9  9       16

If you want only one of the subset sum which reaches the given value, you can try subsetsum from package adagio

library(adagio)
target <- 20
res <- df[subsetsum(df$quantity,target)$inds,]

which gives

> res
  id quantity
2  2        1
5  5       19

Upvotes: 3

GKi
GKi

Reputation: 39667

In case the combination are ok:

target <- 20
lapply(seq_len(sum(cumsum(sort(x$quantity)) <= target)), function(n) {
  y <- combn(x$quantity, n)
  y[,colSums(y) == target]
})
#[[1]]
#integer(0)
#
#[[2]]
#     [,1] [,2]
#[1,]    1    4
#[2,]   19   16
#
#[[3]]
#     [,1] [,2]
#[1,]    1    4
#[2,]    3    3
#[3,]   16   13
#
#[[4]]
#[1]  1  4 12  3

...and to get the row:

lapply(seq_len(sum(cumsum(sort(x$quantity)) <= target)), function(n) {
  y <- combn(x$quantity, n)
  y <- y[,colSums(y) == target, drop = FALSE]
  if(length(y) > 0) {apply(y, 2, match, x$quantity)}
})
#[[1]]
#NULL
#
#[[2]]
#     [,1] [,2]
#[1,]    2    3
#[2,]    5    9
#
#[[3]]
#     [,1] [,2]
#[1,]    2    3
#[2,]    7    7
#[3,]    9    8
#
#[[4]]
#     [,1]
#[1,]    2
#[2,]    3
#[3,]    4
#[4,]    7

... and somehow like the expected output:

lapply(seq_len(sum(cumsum(sort(x$quantity)) <= target)), function(n) {
  y <- combn(x$quantity, n)
  y <- y[,colSums(y) == target, drop = FALSE]
  if(length(y) > 0) {apply(y, 2, function(i) {x[match(i, x$quantity),]})}
})
#[[1]]
#NULL
#
#[[2]]
#[[2]][[1]]
#  id quantity
#2  2        1
#5  5       19
#
#[[2]][[2]]
#  id quantity
#3  3        4
#9  9       16
#
#
#[[3]]
#[[3]][[1]]
#  id quantity
#2  2        1
#7  7        3
#9  9       16
#
#[[3]][[2]]
#  id quantity
#3  3        4
#7  7        3
#8  8       13
#
#
#[[4]]
#[[4]][[1]]
#  id quantity
#2  2        1
#3  3        4
#4  4       12
#7  7        3

Data:

x <- structure(list(id = 1:10, quantity = c(11L, 1L, 4L, 12L, 19L, 10L, 3L, 13L, 16L
  , 14L)), class ="data.frame", row.names = c(NA,-10L))

Upvotes: 4

Related Questions