Reputation: 83
I have a dataset df
> df
date group x
1 197302 A 0.53
2 197303 A 0.60
3 197304 A 0.57
4 197302 B 0.68
5 197303 B 0.71
6 197304 B 0.65
7 197302 C 0.16
8 197303 C 0.25
9 197304 C 0.22
10 197302 D 0.31
11 197303 D 0.39
12 197304 D 0.36
I want to create a new column 'x.total' where some of the x-values are summed based on two conditions:
By following these conditions, the results should end up looking like this:
date group x x.total
1 197302 A 0.53 1.21
2 197303 A 0.60 1.31
3 197304 A 0.57 1.22
4 197302 B 0.68 1.21
5 197303 B 0.71 1.31
6 197304 B 0.65 1.22
7 197302 C 0.16 0.47
8 197303 C 0.25 0.64
9 197304 C 0.22 0.58
10 197302 D 0.31 0.47
11 197303 D 0.39 0.64
12 197304 D 0.36 0.58
Does anyone know how I can do that?
Upvotes: 0
Views: 58
Reputation: 39667
You can use sum
in ave
for the groups date
and group %in% c("A", "B")
:
df$x.total <- ave(df$x, df$date, df$group %in% c("A", "B"), FUN=sum)
df
# date group x x.total
#1 197302 A 0.53 1.21
#2 197303 A 0.60 1.31
#3 197304 A 0.57 1.22
#4 197302 B 0.68 1.21
#5 197303 B 0.71 1.31
#6 197304 B 0.65 1.22
#7 197302 C 0.16 0.47
#8 197303 C 0.25 0.64
#9 197304 C 0.22 0.58
#10 197302 D 0.31 0.47
#11 197303 D 0.39 0.64
#12 197304 D 0.36 0.58
Upvotes: 0
Reputation: 2419
library(data.table)
df <- fread("
date group x
197302 A 0.53
197303 A 0.60
197304 A 0.57
197302 B 0.68
197303 B 0.71
197304 B 0.65
197302 C 0.16
197303 C 0.25
197304 C 0.22
197302 D 0.31
197303 D 0.39
197304 D 0.36"
)
group1 <- c("A","B")
group2 <- c("C","D")
df[, x.sum := fcase(
group %chin% group1, sum(.SD[group %chin% group1,x]),
group %chin% group2, sum(.SD[group %chin% group2,x])
),
by=date][]
Upvotes: 0
Reputation: 26218
This will also do
library(tidyverse)
df <- read.table(text = "date group x
1 197302 A 0.53
2 197303 A 0.60
3 197304 A 0.57
4 197302 B 0.68
5 197303 B 0.71
6 197304 B 0.65
7 197302 C 0.16
8 197303 C 0.25
9 197304 C 0.22
10 197302 D 0.31
11 197303 D 0.39
12 197304 D 0.36")
df %>% group_by(date, group %in% c("A", "B")) %>%
mutate(Total = sum(x))
# A tibble: 12 x 5
# Groups: date, group %in% c("A", "B") [6]
date group x `group %in% c("A", "B")` Total
<int> <chr> <dbl> <lgl> <dbl>
1 197302 A 0.53 TRUE 1.21
2 197303 A 0.6 TRUE 1.31
3 197304 A 0.570 TRUE 1.22
4 197302 B 0.68 TRUE 1.21
5 197303 B 0.71 TRUE 1.31
6 197304 B 0.65 TRUE 1.22
7 197302 C 0.16 FALSE 0.47
8 197303 C 0.25 FALSE 0.64
9 197304 C 0.22 FALSE 0.580
10 197302 D 0.31 FALSE 0.47
11 197303 D 0.39 FALSE 0.64
12 197304 D 0.36 FALSE 0.580
Upvotes: 0
Reputation: 1
Here is one way to do it, by using tidyverse and doing a bit of a refactoring of the groups
library(tidyverse)
df <- tibble::tribble(
~date, ~group, ~num,
197302L, "A", 0.53,
197303L, "A", 0.6,
197304L, "A", 0.57,
197302L, "B", 0.68,
197303L, "B", 0.71,
197304L, "B", 0.65,
197302L, "C", 0.16,
197303L, "C", 0.25,
197304L, "C", 0.22,
197302L, "D", 0.31,
197303L, "D", 0.39,
197304L, "D", 0.36
)
df %>%
mutate(group_2 = ifelse(group %in% c("A","B"),"AB",
ifelse(group %in% c("C","D"),"CD","other"))) %>%
group_by(date,group_2) %>%
mutate(sum = sum(num))
Upvotes: 0
Reputation: 17648
you can try a tidyverse
library(tidyverse)
read.table(text = "date group x
1 197302 A 0.53
2 197303 A 0.60
3 197304 A 0.57
4 197302 B 0.68
5 197303 B 0.71
6 197304 B 0.65
7 197302 C 0.16
8 197303 C 0.25
9 197304 C 0.22
10 197302 D 0.31
11 197303 D 0.39
12 197304 D 0.36") %>%
mutate(gr = fct_collapse(group, AB = c("A", "B"), CD = c("C", "D"))) %>%
group_by(date, gr) %>%
mutate(total = sum(x))
# A tibble: 12 x 5
# Groups: date, gr [6]
date group x gr total
<int> <chr> <dbl> <fct> <dbl>
1 197302 A 0.53 AB 1.21
2 197303 A 0.6 AB 1.31
3 197304 A 0.570 AB 1.22
4 197302 B 0.68 AB 1.21
5 197303 B 0.71 AB 1.31
6 197304 B 0.65 AB 1.22
7 197302 C 0.16 CD 0.47
8 197303 C 0.25 CD 0.64
9 197304 C 0.22 CD 0.580
10 197302 D 0.31 CD 0.47
11 197303 D 0.39 CD 0.64
12 197304 D 0.36 CD 0.580
Upvotes: 1