
Reputation: 101

How to find the proportion based on count

I have data frame like this and I have try to sort the data set alphabetically and identify the count of each node and corresponding activity. then I want to calculate the proportion of each activity using count(activity)/ count(corresponding node).

node <- c("A","B","A","A","A","A","B","B")
activity <- c("e","c","d","a","o","a","c","e")
df1 <- data.frame(node,activity)

I have try this code

df1 %>%
  group_by(node) %>%
  order(ndf$node, decreasing = F) %>%
  summaries("count1" = count(node), "count2" = count(avtivity), "Proportion" = count2/count1)

Desire output is

node <- c("A","A","A","A","B","B")
countnode <- c(5,5,5,5,3,3)
activity <- c("e","d","a","o","c","e")
countac <- c(1,1,2,1,2,1)
t <- c("1/5","1/5","2/5","1/5","2/3","1/3")
df2 <- data.frame(node, countnode, activity, countac, t)

Upvotes: 2

Views: 95

Answers (5)


Reputation: 1364

Using data.table

df3 = df1[, .(CountAc = sum(.N)), keyby = .(node, activity)][, `:=` (CountNode = sum(CountAc), t = CountAc/sum(CountAc)), node]    

> df3
   node activity CountAc CountNode         t
1:    A        a       2         5 0.4000000
2:    A        d       1         5 0.2000000
3:    A        e       1         5 0.2000000
4:    A        o       1         5 0.2000000
5:    B        c       2         3 0.6666667
6:    B        e       1         3 0.3333333

Upvotes: 0


Reputation: 39647

In base you can use table and ave:

x <- data.frame(table(df1))
x <- x[x[,3] > 0,]
names(x)[3] <- "countac"
x$countnode <- ave(x$countac, x$node, FUN=sum)
x$t <- apply(x[c("countac", "countnode")], 1, paste, collapse = "/")
#x$t <- x$countac / x$countnode #In case you want the value
x[, x[1:2]),c(1,4,2,3,5)]
#  node countnode activity countac   t
#1    A         5        a       2 2/5
#5    A         5        d       1 1/5
#7    A         5        e       1 1/5
#9    A         5        o       1 1/5
#4    B         3        c       2 2/3
#8    B         3        e       1 1/3

Upvotes: 1


Reputation: 39595

Maybe try this. You can use group_by() to obtain the counts and then compute the proportion. Here the code:

newdf <- df1 %>% arrange(node) %>%
  group_by(node) %>% mutate(countnode=n()) %>% ungroup() %>%
  group_by(node,activity) %>% mutate(countac=n(),


# A tibble: 8 x 5
# Groups:   node, activity [6]
  node  activity countnode countac     t
  <fct> <fct>        <int>   <int> <dbl>
1 A     e                5       1 0.2  
2 A     d                5       1 0.2  
3 A     a                5       2 0.4  
4 A     o                5       1 0.2  
5 A     a                5       2 0.4  
6 B     c                3       2 0.667
7 B     c                3       2 0.667
8 B     e                3       1 0.333

Upvotes: 0

Karthik S
Karthik S

Reputation: 11584

Does this work:

df1 %>% count(node, activity, name = 'countac') %>% mutate(t = countac/countnode) %>% left_join(
df1 %>% group_by(node) %>% mutate(countnode = n()) %>% ungroup()
) %>% distinct() %>% select(1,5,2,3,4)
Joining, by = c("node", "activity")
  node countnode activity countac         t
1    A         5        a       2 0.4000000
2    A         5        d       1 0.2000000
3    A         5        e       1 0.2000000
4    A         5        o       1 0.2000000
5    B         3        c       2 0.6666667
6    B         3        e       1 0.3333333

Upvotes: 0


Reputation: 26218

i think you want this

df1 %>% arrange(node) %>% group_by(node) %>%
  mutate(countac = n()) %>% group_by(node, activity) %>%
  mutate(t = n()/countac) %>% ungroup()

# A tibble: 8 x 4
  node  activity countac     t
  <chr> <chr>      <int> <dbl>
1 A     e              5 0.2  
2 A     d              5 0.2  
3 A     a              5 0.4  
4 A     o              5 0.2  
5 A     a              5 0.4  
6 B     c              3 0.667
7 B     c              3 0.667
8 B     e              3 0.333

Upvotes: 0

Related Questions