Xixi
Xixi

Reputation: 127

How to select value in different rows for each different column in R?

I have two data frame, data1 is about kids' age. For different years, the values are different.

data2 is still the population data for each age, the column name is still year.

What I want is extract the population information into a new dataframe. But for the year 2008/2009/2010, the population data should be from age7-age10. But for 2011/2012/2013, the population data should from age6-age9.

Anyone has any idea?

data2 = data.frame('population by age' = seq(5, 11, by = 1), 
                     '2008' = c(145391,
                                140621,
                                136150,
                                131944,
                                198933,
                                182182,
                                159103
                     ),  
                     '2009' = c(148566,
                                143943,
                                139367,
                                135083,
                                212196,
                                196398,
                                155033
                     ), 
                     '2010' = c(152330,
                                147261,
                                142555,
                                138172,
                                218701,
                                161330,
                                142190
                     ),  
                     '2011' = c(156630,
                                151387,
                                146491,
                                141905,
                                119397,
                                116093,
                                112666
                     ),
                     '2012' = c(133545,
                                129737,
                                126124,
                                122678,
                                120213,
                                116826,
                                113381
                      ),
                     '2013' = c(119397,
                                116093,
                                112666,
                                109174,
                                106871,
                                103659,
                                100398)) 



                 data1 <- data.frame('2008'= c(7,
                                               8,
                                               9,
                                               10),
                                     '2009' = c(7,
                                                8,
                                                9,
                                                10),
                                     '2010' = c(7,
                                                8,
                                                9,
                                                10),
                                     '2011' = c(6,
                                                7,
                                                8,
                                                9),
                      '2012' = c(6,
                                 7,
                                 8,
                                 9),
                      '2013' = c(6,
                                 7,
                                 8,
                                 9)
                      )

Upvotes: 2

Views: 60

Answers (4)

Onyambu
Onyambu

Reputation: 79348

using tidyverse

stack(data1)%>%
   mutate(val=data2[cbind(match(values,data2[,1]),as.numeric(ind))])%>%
   spread(ind,val)

  values X2008  X2009  X2010  X2011  X2012  X2013
1      6    NA     NA     NA 147261 151387 129737
2      7     7 136150 139367 142555 146491 126124
3      8     8 131944 135083 138172 141905 122678
4      9     9 198933 212196 218701 119397 120213
5     10    10 182182 196398     NA     NA     NA

Using base R:

xtabs(val~.,cbind(a<-stack(data1),val=data2[cbind(a$values-4,as.numeric(a$ind))]))
      ind
values  X2008  X2009  X2010  X2011  X2012  X2013
    6       0      0      0 147261 151387 129737
    7       7 136150 139367 142555 146491 126124
    8       8 131944 135083 138172 141905 122678
    9       9 198933 212196 218701 119397 120213
    10     10 182182 196398      0      0      0

Upvotes: 0

moodymudskipper
moodymudskipper

Reputation: 47350

it seems to me you want one of those:

library(tidyverse)

data2 %>%
  gather(year,value,-population.by.age) %>%
  inner_join(gather(data1,year,population.by.age)) %>%
  spread(year,value)

# population.by.age  X2008  X2009  X2010  X2011  X2012  X2013
# 1                 6     NA     NA     NA 151387 129737 116093
# 2                 7 136150 139367 142555 146491 126124 112666
# 3                 8 131944 135083 138172 141905 122678 109174
# 4                 9 198933 212196 218701 119397 120213 106871
# 5                10 182182 196398 161330     NA     NA     NA


data2 %>%
  gather(year,value,-population.by.age) %>%
  inner_join(gather(data1,year,population.by.age)) %>%
  group_by(year) %>%
  mutate(population.by.age = letters[row_number()]) %>%
  spread(year,value)

# # A tibble: 4 x 7
# population.by.age  X2008  X2009  X2010  X2011  X2012  X2013
# <chr>              <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1 a                 136150 139367 142555 151387 129737 116093
# 2 b                 131944 135083 138172 146491 126124 112666
# 3 c                 198933 212196 218701 141905 122678 109174
# 4 d                 182182 196398 161330 119397 120213 106871

Here is a base version for the second case, without going through long/wide reformating:

data3 <- data1
data3[] <- Map(function(x,y) y[data2[[1]] %in% x,drop=FALSE],data1,data2[-1])
data3
#    X2008  X2009  X2010  X2011  X2012  X2013
# 1 136150 139367 142555 151387 129737 116093
# 2 131944 135083 138172 146491 126124 112666
# 3 198933 212196 218701 141905 122678 109174
# 4 182182 196398 161330 119397 120213 106871

Upvotes: 2

Tarssio Barreto
Tarssio Barreto

Reputation: 40

It's not very elegant, but you can try this:

aux <- data2 %>% 
  select(population.by.age,X2008,X2009,X2010) %>% 
  filter(population.by.age > 6, 
         population.by.age < 10)


aux2 <- data2 %>% 
  select(population.by.age,X2011,X2012,X2013) %>% 
  filter(population.by.age > 5, 
         population.by.age < 9)

df <- full_join(aux,aux2) %>% 
  arrange(population.by.age)

Good Luck!

Upvotes: 0

akrun
akrun

Reputation: 887941

We could gather the second dataset into 'long' format, filter based on the conditions and spread into 'wide'

library(tidyverse)
gather(data2, key, val, X2008:X2013) %>% 
     filter((population.by.age %in% 7:10 & key %in% paste0("X", 2008:2010))|
            (population.by.age %in% 6:9 & key %in% paste0("X", 2011:2013))) %>% 
     spread(key, val)
#population.by.age  X2008  X2009  X2010  X2011  X2012  X2013
#1                 6     NA     NA     NA 151387 129737 116093
#2                 7 136150 139367 142555 146491 126124 112666
#3                 8 131944 135083 138172 141905 122678 109174
#4                 9 198933 212196 218701 119397 120213 106871
#5                10 182182 196398 161330     NA     NA     NA

Upvotes: 1

Related Questions