Jay khan
Jay khan

Reputation: 745

Get sum of unique rows in table function in R

Suppose I have data which looks like this

Id Name Price sales Profit Month Category Mode Supplier
1    A     2     5      8     1        X    K     John
1    A     2     6      9     2        X    K     John
1    A     2     5      8     3        X    K     John
2    B     2     4      6     1        X    L      Sam
2    B     2     3      4     2        X    L      Sam
2    B     2     5      7     3        X    L      Sam
3    C     2     5     11     1        X    M     John
3    C     2     5     11     2        X    L     John
3    C     2     5     11     3        X    K     John
4    D     2     8     10     1        Y    M     John
4    D     2     8     10     2        Y    K     John
4    D     2     5      7     3        Y    K     John
5    E     2     5      9     1        Y    M      Sam
5    E     2     5      9     2        Y    L      Sam
5    E     2     5      9     3        Y    M      Sam
6    F     2     4      7     1        Z    M     Kyle
6    F     2     5      8     2        Z    L     Kyle
6    F     2     5      8     3        Z    M     Kyle

if I apply table function, it will just combines are the rows and result will be

    K L M
  X 4 4 1
  Y 2 1 3
  Z 0 1 2

Now what if I want not the sum of all rows but only sum of those rows with Unique Id so it looks like

    K L M
  X 2 2 1
  Y 1 1 2
  Z 0 1 1

Thanks

Upvotes: 2

Views: 96

Answers (2)

akrun
akrun

Reputation: 887981

We can try

library(data.table)
dcast(unique(setDT(df1[c('Category', 'Mode', 'Id')])), 
            Category~Mode, value.var='Id', length)
#   Category K L M
#1:        X 2 2 1
#2:        Y 1 1 2
#3:        Z 0 1 1

Or with dplyr

library(dplyr)
df1 %>% 
  distinct(Id, Category, Mode) %>% 
  group_by(Category, Mode) %>%
  tally() %>% 
  spread(Mode, n, fill=0)
#   Category     K     L     M
#      (chr) (dbl) (dbl) (dbl)
#1        X     2     2     1
#2        Y     1     1     2
#3        Z     0     1     1

Or as @David Arenburg suggested, a variant of the above is

df1 %>% 
   distinct(Id, Category, Mode) %>% 
   select(Category, Mode) %>%
   table()

Upvotes: 2

user3710546
user3710546

Reputation:

If df is your data.frame:

# Subset original data.frame to keep columns of interest
df1 <- df[,c("Id", "Category", "Mode")]
# Remove duplicated rows
df1 <- df1[!duplicated(df1),]
# Create table
with(df1, table(Category, Mode))
#         Mode
# Category K L M
#        X 2 2 1
#        Y 1 1 2
#        Z 0 1 1

Or in one line using unique

table(unique(df[c("Id", "Category", "Mode")])[-1])

df <- structure(list(Id = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L), Name = structure(c(1L, 1L, 1L, 
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L), .Label = c("A", 
"B", "C", "D", "E", "F"), class = "factor"), Price = c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), sales = c(5L, 6L, 5L, 4L, 3L, 5L, 5L, 5L, 5L, 8L, 8L, 5L, 
5L, 5L, 5L, 4L, 5L, 5L), Profit = c(8L, 9L, 8L, 6L, 4L, 7L, 11L, 
11L, 11L, 10L, 10L, 7L, 9L, 9L, 9L, 7L, 8L, 8L), Month = c(1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
3L), Category = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("X", "Y", "Z"
), class = "factor"), Mode = structure(c(1L, 1L, 1L, 2L, 2L, 
2L, 3L, 2L, 1L, 3L, 1L, 1L, 3L, 2L, 3L, 3L, 2L, 3L), .Label = c("K", 
"L", "M"), class = "factor"), Supplier = structure(c(1L, 1L, 
1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L
), .Label = c("John", "Kyle", "Sam"), class = "factor")), .Names = c("Id", 
"Name", "Price", "sales", "Profit", "Month", "Category", "Mode", 
"Supplier"), class = "data.frame", row.names = c(NA, -18L))

Upvotes: 6

Related Questions