Reputation: 101
I have data frame like this and I have try to sort the data set alphabetically and identify the count of each node and corresponding activity. then I want to calculate the proportion of each activity using count(activity)/ count(corresponding node).
node <- c("A","B","A","A","A","A","B","B")
activity <- c("e","c","d","a","o","a","c","e")
df1 <- data.frame(node,activity)
I have try this code
df1 %>%
group_by(node) %>%
order(ndf$node, decreasing = F) %>%
summaries("count1" = count(node), "count2" = count(avtivity), "Proportion" = count2/count1)
Desire output is
node <- c("A","A","A","A","B","B")
countnode <- c(5,5,5,5,3,3)
activity <- c("e","d","a","o","c","e")
countac <- c(1,1,2,1,2,1)
t <- c("1/5","1/5","2/5","1/5","2/3","1/3")
df2 <- data.frame(node, countnode, activity, countac, t)
Upvotes: 2
Views: 95
Reputation: 1364
Using data.table
setDT(df1)
df3 = df1[, .(CountAc = sum(.N)), keyby = .(node, activity)][, `:=` (CountNode = sum(CountAc), t = CountAc/sum(CountAc)), node]
> df3
node activity CountAc CountNode t
1: A a 2 5 0.4000000
2: A d 1 5 0.2000000
3: A e 1 5 0.2000000
4: A o 1 5 0.2000000
5: B c 2 3 0.6666667
6: B e 1 3 0.3333333
Upvotes: 0
Reputation: 39647
In base you can use table
and ave
:
x <- data.frame(table(df1))
x <- x[x[,3] > 0,]
names(x)[3] <- "countac"
x$countnode <- ave(x$countac, x$node, FUN=sum)
x$t <- apply(x[c("countac", "countnode")], 1, paste, collapse = "/")
#x$t <- x$countac / x$countnode #In case you want the value
x[do.call(order, x[1:2]),c(1,4,2,3,5)]
# node countnode activity countac t
#1 A 5 a 2 2/5
#5 A 5 d 1 1/5
#7 A 5 e 1 1/5
#9 A 5 o 1 1/5
#4 B 3 c 2 2/3
#8 B 3 e 1 1/3
Upvotes: 1
Reputation: 39595
Maybe try this. You can use group_by()
to obtain the counts and then compute the proportion. Here the code:
library(dplyr)
#Code
newdf <- df1 %>% arrange(node) %>%
group_by(node) %>% mutate(countnode=n()) %>% ungroup() %>%
group_by(node,activity) %>% mutate(countac=n(),
t=countac/countnode)
Output:
# A tibble: 8 x 5
# Groups: node, activity [6]
node activity countnode countac t
<fct> <fct> <int> <int> <dbl>
1 A e 5 1 0.2
2 A d 5 1 0.2
3 A a 5 2 0.4
4 A o 5 1 0.2
5 A a 5 2 0.4
6 B c 3 2 0.667
7 B c 3 2 0.667
8 B e 3 1 0.333
Upvotes: 0
Reputation: 11584
Does this work:
library(dplyr)
df1 %>% count(node, activity, name = 'countac') %>% mutate(t = countac/countnode) %>% left_join(
df1 %>% group_by(node) %>% mutate(countnode = n()) %>% ungroup()
) %>% distinct() %>% select(1,5,2,3,4)
Joining, by = c("node", "activity")
node countnode activity countac t
1 A 5 a 2 0.4000000
2 A 5 d 1 0.2000000
3 A 5 e 1 0.2000000
4 A 5 o 1 0.2000000
5 B 3 c 2 0.6666667
6 B 3 e 1 0.3333333
Upvotes: 0
Reputation: 26218
i think you want this
df1 %>% arrange(node) %>% group_by(node) %>%
mutate(countac = n()) %>% group_by(node, activity) %>%
mutate(t = n()/countac) %>% ungroup()
# A tibble: 8 x 4
node activity countac t
<chr> <chr> <int> <dbl>
1 A e 5 0.2
2 A d 5 0.2
3 A a 5 0.4
4 A o 5 0.2
5 A a 5 0.4
6 B c 3 0.667
7 B c 3 0.667
8 B e 3 0.333
Upvotes: 0