Reputation: 2225
I am working on data set that has multiple cumulative field cols
and multiple daily field cols
and by using pivot_longer
looking to convert them into:
cumulative field col
anddaily field col
Snapshot of Data
"Updated.On","State","First.Dose.Administered","Second.Dose.Administered","Daily_First_dose","Daily_Second_dose"
2021-07-09,"India",297184419,69894633,2071121,1141754
2021-07-09,"Andaman and Nicobar Islands",164079,51646,795,6336
2021-07-09,"Andhra Pradesh",13372990,3379660,25571,11168
2021-07-09,"Arunachal Pradesh",583021,108393,8423,3430
2021-07-09,"Assam",6462249,1328646,52913,7681
2021-07-09,"Bihar",15741778,2638686,262299,55007
2021-07-09,"Chandigarh",483886,110610,4351,3491
2021-07-09,"Chhattisgarh",7438953,1819709,64193,42342
2021-07-09,"Dadra and Nagar Haveli and Daman and Diu",458057,57874,7122,1745
2021-07-09,"Delhi",6761634,2049318,95352,35848
Transformation of cols to:
First.Dose.Administered, Second.Dose.Administered
cols to
names_to = "Dose_type"
, values_to = "Dose_Administered"
Daily_First_dose, Daily_Second_dose
cols to names_to = "Dose_type_daily"
, values_to = "Dose_Administered_daily"
Desired Data frame Example:
Updated.On State Dose_type Dose_Administered Dose_type_daily Dose_Administered_daily
2021-07-09 India First.Dose.Administered 297184419 Daily_First_dose 2071121
2021-07-09 India Second.Dose.Administered 69894633 Daily_Second_dose 1141754
df can be downloaded from:
library(tidyverse)
library(lubridate)
file_url1 <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/df_vaccination_june.csv"
df_vaccination <- read.csv(url(file_url1))
df_vaccination <- df_vaccination %>%
mutate(Updated.On = as.Date(Updated.On))
Attempt: I tried below code but not sure how to create two different values_to columns?
df_vaccination %>%
pivot_longer(cols = c(First.Dose.Administered:Second.Dose.Administered,
Daily_First_dose:Daily_Second_dose),
names_to = c("Dose_type","Dose_type_daily"),
values_to = c("Dose_Administered","Dose_Administered_daily"))
Upvotes: 2
Views: 244
Reputation: 78927
Update: 2 row solution Here is another way to get the two row solution:
Create a dataframe with lonng format of First.Dose.Administered
and Second.Dose.Administered
mutate
to long format of Daily_First_dose
and Daily_Second_dose
library(dplyr)
# create a dataframe with First.Dose.Administered, Second.Dose.Administered
df_vaccination_1 <- df_vaccination %>%
select(First.Dose.Administered, Second.Dose.Administered) %>%
pivot_longer(
cols = c(First.Dose.Administered, Second.Dose.Administered),
names_to = "Dose_type",
values_to = "Dose_Administered"
)
# resulting dataframe using columns of df_vaccination_1
df_vaccination_result <- df_vaccination %>%
select(-First.Dose.Administered, -Second.Dose.Administered) %>%
pivot_longer(
cols = c(Daily_First_dose, Daily_Second_dose),
names_to = "Dose_type_daily",
values_to = "Dose_Administered_daily"
) %>%
mutate(First.Dose.Administered = df_vaccination_1$Dose_type,
Second.Dose.Administered = df_vaccination_1$Dose_Administered)
Output:
A tibble: 1,776 x 6
Updated.On State Dose_type_daily Dose_Administere~ First.Dose.Admin~ Second.Dose.Admi~
<date> <chr> <chr> <int> <chr> <int>
1 2021-07-09 India Daily_First_do~ 2071121 First.Dose.Admin~ 297184419
2 2021-07-09 India Daily_Second_d~ 1141754 Second.Dose.Admi~ 69894633
3 2021-07-09 Andaman ~ Daily_First_do~ 795 First.Dose.Admin~ 164079
4 2021-07-09 Andaman ~ Daily_Second_d~ 6336 Second.Dose.Admi~ 51646
5 2021-07-09 Andhra P~ Daily_First_do~ 25571 First.Dose.Admin~ 13372990
6 2021-07-09 Andhra P~ Daily_Second_d~ 11168 Second.Dose.Admi~ 3379660
7 2021-07-09 Arunacha~ Daily_First_do~ 8423 First.Dose.Admin~ 583021
8 2021-07-09 Arunacha~ Daily_Second_d~ 3430 Second.Dose.Admi~ 108393
9 2021-07-09 Assam Daily_First_do~ 52913 First.Dose.Admin~ 6462249
10 2021-07-09 Assam Daily_Second_d~ 7681 Second.Dose.Admi~ 1328646
# ... with 1,766 more rows
First answer:
Is this what you are looking for?
df_vaccination %>%
pivot_longer(
cols = c(First.Dose.Administered, Second.Dose.Administered),
names_to = "Dose_type",
values_to = "Dose_Administered"
) %>%
pivot_longer(
cols = c(Daily_First_dose, Daily_Second_dose),
names_to = "Dose_type_daily",
values_to = "Dose_Administered_daily"
)
Output:
Updated.On State Dose_type Dose_Administer~ Dose_type_daily Dose_Administered~
<date> <chr> <chr> <int> <chr> <int>
1 2021-07-09 India First.Dose.Ad~ 297184419 Daily_First_do~ 2071121
2 2021-07-09 India First.Dose.Ad~ 297184419 Daily_Second_d~ 1141754
3 2021-07-09 India Second.Dose.A~ 69894633 Daily_First_do~ 2071121
4 2021-07-09 India Second.Dose.A~ 69894633 Daily_Second_d~ 1141754
5 2021-07-09 Andaman and~ First.Dose.Ad~ 164079 Daily_First_do~ 795
6 2021-07-09 Andaman and~ First.Dose.Ad~ 164079 Daily_Second_d~ 6336
7 2021-07-09 Andaman and~ Second.Dose.A~ 51646 Daily_First_do~ 795
8 2021-07-09 Andaman and~ Second.Dose.A~ 51646 Daily_Second_d~ 6336
9 2021-07-09 Andhra Prad~ First.Dose.Ad~ 13372990 Daily_First_do~ 25571
10 2021-07-09 Andhra Prad~ First.Dose.Ad~ 13372990 Daily_Second_d~ 11168
# ... with 3,542 more rows
Upvotes: 2
Reputation: 388907
I think your final output should have nrow(df_vaccination) * 2
rows.
Here is one non-scalable solution but should work for this data.
df <- df_vaccination
res <- data.frame(Updated.on = rep(df$Updated.On, 2),
State = rep(df$State, 2),
Dose_type = rep(names(df)[3:4], each = nrow(df)),
Dose_Administered = unlist(df[3:4]),
Dose_type_daily = rep(names(df)[5:6], each = nrow(df)),
Dose_Administered_daily = unlist(df[5:6]), row.names = NULL)
res
# Updated.on State Dose_type Dose_Administered
#1 2021-07-09 India First.Dose.Administered 297184419
#2 2021-07-09 Andaman and Nicobar Islands First.Dose.Administered 164079
#3 2021-07-09 Andhra Pradesh First.Dose.Administered 13372990
#4 2021-07-09 Arunachal Pradesh First.Dose.Administered 583021
#5 2021-07-09 Assam First.Dose.Administered 6462249
#6 2021-07-09 Bihar First.Dose.Administered 15741778
# Dose_type_daily Dose_Administered_daily
#1 Daily_First_dose 2071121
#2 Daily_First_dose 795
#3 Daily_First_dose 25571
#4 Daily_First_dose 8423
#5 Daily_First_dose 52913
#6 Daily_First_dose 262299
Upvotes: 1
Reputation: 26218
I think you need pivot_longer
twice with a proper names_pattern
regex
argument.
library(tidyverse)
df_vaccination %>%
pivot_longer(cols = 3:4, names_to = c('dose_adminstered','.value'),
names_pattern = '(.*)\\.(Administered)') %>%
pivot_longer(cols = 3:4, names_to = c('dose_type_daily', '.value'),
names_pattern = '(.*)_(.*)$')
#> # A tibble: 3,552 x 6
#> Updated.On State dose_adminstered Administered dose_type_daily dose
#> <date> <chr> <chr> <int> <chr> <int>
#> 1 2021-07-09 India First.Dose 297184419 Daily_First 2.07e6
#> 2 2021-07-09 India First.Dose 297184419 Daily_Second 1.14e6
#> 3 2021-07-09 India Second.Dose 69894633 Daily_First 2.07e6
#> 4 2021-07-09 India Second.Dose 69894633 Daily_Second 1.14e6
#> 5 2021-07-09 Andaman and ~ First.Dose 164079 Daily_First 7.95e2
#> 6 2021-07-09 Andaman and ~ First.Dose 164079 Daily_Second 6.34e3
#> 7 2021-07-09 Andaman and ~ Second.Dose 51646 Daily_First 7.95e2
#> 8 2021-07-09 Andaman and ~ Second.Dose 51646 Daily_Second 6.34e3
#> 9 2021-07-09 Andhra Prade~ First.Dose 13372990 Daily_First 2.56e4
#> 10 2021-07-09 Andhra Prade~ First.Dose 13372990 Daily_Second 1.12e4
#> # ... with 3,542 more rows
To return only 2 rows per State you may do this workaround
df_vaccination[1:4] %>%
pivot_longer(cols = 3:4, names_to = c('dose_adminstered','.value'),
names_pattern = '(.*)\\.(Administered)') %>%
cbind(df_vaccination[5:6] %>% pivot_longer(everything(), names_to = c('dose_type_daily', '.value'),
names_pattern = '(.*)_(.*)$'))
Updated.On State dose_adminstered Administered dose_type_daily dose
1 2021-07-09 India First.Dose 297184419 Daily_First 2071121
2 2021-07-09 India Second.Dose 69894633 Daily_Second 1141754
3 2021-07-09 Andaman and Nicobar Islands First.Dose 164079 Daily_First 795
4 2021-07-09 Andaman and Nicobar Islands Second.Dose 51646 Daily_Second 6336
5 2021-07-09 Andhra Pradesh First.Dose 13372990 Daily_First 25571
6 2021-07-09 Andhra Pradesh Second.Dose 3379660 Daily_Second 11168
7 2021-07-09 Arunachal Pradesh First.Dose 583021 Daily_First 8423
8 2021-07-09 Arunachal Pradesh Second.Dose 108393 Daily_Second 3430
.
.
.
Upvotes: 2