Reputation: 67
I have a df like that
df = data.frame (user = c('u1', 'u1', 'u1', 'u2', 'u2'),
entity = c('e1','e2','e3','e3','e4'),
area = c('a1','a1','a2','a2','a1'),
sex=c('M','M','M','F','F'))
and i need to obtain a df like that
df2<- data.frame (area = c('a1', 'a2'),
male = c(1,1),
female = c(1,1),
total=c(2,2))
Count the number of women and men by area
Upvotes: 1
Views: 46
Reputation: 79164
Update:
I am still not quite sure. I am using the idea of Yuriy Saraykin with distinct
(credits to him +1):
library(dplyr)
library(tidyr)
df %>%
distinct(user, area, sex) %>%
group_by(area, sex) %>%
summarise(value =n()) %>%
pivot_wider(
names_from = sex,
values_from = value
) %>%
mutate(total = sum(F, M)) %>%
rename(female=F, male=M)
area female male total
<chr> <int> <int> <int>
1 a1 1 1 2
2 a2 1 1 2
First answer: Not correct! One way could be:
library(dplyr)
library(tidyr)
df %>%
group_by(area, sex) %>%
summarise(value =n()) %>%
pivot_wider(
names_from = sex,
values_from = value
) %>%
mutate(total = F+M) %>%
rename(female=F, male=M)
area female male total
<chr> <int> <int> <int>
1 a1 1 2 3
2 a2 1 1 2
Upvotes: 0
Reputation: 8880
df = data.frame (user = c('u1', 'u1', 'u1', 'u2', 'u2'),
entity = c('e1','e2','e3','e3','e4'),
area = c('a1','a1','a2','a2','a1'),
sex=c('M','M','M','F','F'))
library(tidyverse)
df %>%
distinct(user, area, sex) %>%
mutate(sex = ifelse(sex == "M", "male", "female")) %>%
pivot_wider(
id_cols = area,
names_from = sex,
values_from = sex,
values_fill = 0,
values_fn = length
) %>%
mutate(Total = rowSums(across(male:female)))
#> # A tibble: 2 x 4
#> area male female Total
#> <chr> <int> <int> <dbl>
#> 1 a1 1 1 2
#> 2 a2 1 1 2
Created on 2022-01-25 by the reprex package (v2.0.1)
Upvotes: 2