Reputation: 127
I have two data frame, data1 is about kids' age. For different years, the values are different.
data2 is still the population data for each age, the column name is still year.
What I want is extract the population information into a new dataframe. But for the year 2008/2009/2010, the population data should be from age7-age10. But for 2011/2012/2013, the population data should from age6-age9.
Anyone has any idea?
data2 = data.frame('population by age' = seq(5, 11, by = 1),
'2008' = c(145391,
140621,
136150,
131944,
198933,
182182,
159103
),
'2009' = c(148566,
143943,
139367,
135083,
212196,
196398,
155033
),
'2010' = c(152330,
147261,
142555,
138172,
218701,
161330,
142190
),
'2011' = c(156630,
151387,
146491,
141905,
119397,
116093,
112666
),
'2012' = c(133545,
129737,
126124,
122678,
120213,
116826,
113381
),
'2013' = c(119397,
116093,
112666,
109174,
106871,
103659,
100398))
data1 <- data.frame('2008'= c(7,
8,
9,
10),
'2009' = c(7,
8,
9,
10),
'2010' = c(7,
8,
9,
10),
'2011' = c(6,
7,
8,
9),
'2012' = c(6,
7,
8,
9),
'2013' = c(6,
7,
8,
9)
)
Upvotes: 2
Views: 60
Reputation: 79348
using tidyverse
stack(data1)%>%
mutate(val=data2[cbind(match(values,data2[,1]),as.numeric(ind))])%>%
spread(ind,val)
values X2008 X2009 X2010 X2011 X2012 X2013
1 6 NA NA NA 147261 151387 129737
2 7 7 136150 139367 142555 146491 126124
3 8 8 131944 135083 138172 141905 122678
4 9 9 198933 212196 218701 119397 120213
5 10 10 182182 196398 NA NA NA
Using base R:
xtabs(val~.,cbind(a<-stack(data1),val=data2[cbind(a$values-4,as.numeric(a$ind))]))
ind
values X2008 X2009 X2010 X2011 X2012 X2013
6 0 0 0 147261 151387 129737
7 7 136150 139367 142555 146491 126124
8 8 131944 135083 138172 141905 122678
9 9 198933 212196 218701 119397 120213
10 10 182182 196398 0 0 0
Upvotes: 0
Reputation: 47350
it seems to me you want one of those:
library(tidyverse)
data2 %>%
gather(year,value,-population.by.age) %>%
inner_join(gather(data1,year,population.by.age)) %>%
spread(year,value)
# population.by.age X2008 X2009 X2010 X2011 X2012 X2013
# 1 6 NA NA NA 151387 129737 116093
# 2 7 136150 139367 142555 146491 126124 112666
# 3 8 131944 135083 138172 141905 122678 109174
# 4 9 198933 212196 218701 119397 120213 106871
# 5 10 182182 196398 161330 NA NA NA
data2 %>%
gather(year,value,-population.by.age) %>%
inner_join(gather(data1,year,population.by.age)) %>%
group_by(year) %>%
mutate(population.by.age = letters[row_number()]) %>%
spread(year,value)
# # A tibble: 4 x 7
# population.by.age X2008 X2009 X2010 X2011 X2012 X2013
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 a 136150 139367 142555 151387 129737 116093
# 2 b 131944 135083 138172 146491 126124 112666
# 3 c 198933 212196 218701 141905 122678 109174
# 4 d 182182 196398 161330 119397 120213 106871
Here is a base version for the second case, without going through long/wide reformating:
data3 <- data1
data3[] <- Map(function(x,y) y[data2[[1]] %in% x,drop=FALSE],data1,data2[-1])
data3
# X2008 X2009 X2010 X2011 X2012 X2013
# 1 136150 139367 142555 151387 129737 116093
# 2 131944 135083 138172 146491 126124 112666
# 3 198933 212196 218701 141905 122678 109174
# 4 182182 196398 161330 119397 120213 106871
Upvotes: 2
Reputation: 40
It's not very elegant, but you can try this:
aux <- data2 %>%
select(population.by.age,X2008,X2009,X2010) %>%
filter(population.by.age > 6,
population.by.age < 10)
aux2 <- data2 %>%
select(population.by.age,X2011,X2012,X2013) %>%
filter(population.by.age > 5,
population.by.age < 9)
df <- full_join(aux,aux2) %>%
arrange(population.by.age)
Good Luck!
Upvotes: 0
Reputation: 887941
We could gather
the second dataset into 'long' format, filter
based on the conditions and spread
into 'wide'
library(tidyverse)
gather(data2, key, val, X2008:X2013) %>%
filter((population.by.age %in% 7:10 & key %in% paste0("X", 2008:2010))|
(population.by.age %in% 6:9 & key %in% paste0("X", 2011:2013))) %>%
spread(key, val)
#population.by.age X2008 X2009 X2010 X2011 X2012 X2013
#1 6 NA NA NA 151387 129737 116093
#2 7 136150 139367 142555 146491 126124 112666
#3 8 131944 135083 138172 141905 122678 109174
#4 9 198933 212196 218701 119397 120213 106871
#5 10 182182 196398 161330 NA NA NA
Upvotes: 1