Reputation: 560
I would like some help to make my code a bit leaner.
Here is a fraction of my original dataset:
library(tidyverse)
dataset <- data.frame(UPA = c(130033353, 130033353, 130033353, 130033353,
130033353, 230036930, 230036930, 230036930, 230036930, 230036930,
230124582, 230124582, 230124582, 230124582, 230124582, 240039107,
240039107, 240039107, 240039107, 240039107, 320022393, 320022393,
320022393, 320022393, 320022393, 330093898, 330093898, 330093898,
330093898, 330093898),
UF = c(13, 13, 13, 13, 13, 23, 23, 23,23, 23, 23, 23, 23, 23, 23, 24, 24,
24, 24, 24, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33),
V1008 = c(1, 1, 1, 1, 1, 5, 5, 5, 5,5, 11, 11, 11, 11, 11,
8, 8, 8, 8, 8, 3, 3, 3, 3, 3, 9, 9, 9,9, 9),
V1014 = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
V2008 = c(22, 23, 12, 28, 29, 13, 9, 9, 2, 22, 18, 5, 8,
16, 16, 12, 24, 2, 25, 28, 7, 7, 7, 15, 15, 6,
6, 18, 14, 14),
V20081 = c(1, 9, 6, 3, 5, 9, 7, 6, 9, 5, 2, 6, 1, 5, 9,
10, 5, 4, 5, 1, 7, 7, 7, 5, 5, 8, 8, 4, 8, 8),
V20082 = c(1952, 1964, 1995, 1999, 2009, 1993, 1998, 2000,
2003, 2011, 1967, 1990, 1993, 1996, 2001, 1947, 1996,
1998, 1997, 2012, 2010, 2010, 2010, 2011, 2011, 1981, 1981, 1984,
2006, 2006),
V2003 = c(1, 2, 6, 8, 10, 2, 9, 10, 12, 15, 2, 3,
5, 7, 11, 2, 6, 7, 7, 9, 3, 3, 3, 4, 4, 1, 1, 2, 4, 4),
V2007 = c(1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 1, 2,
2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2),
n_p = c(1, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, 1, 2, 1, NA, NA),
id_dom = c(2499L, 2499L, 2499L, 2499L,
2499L, 10962L, 10962L, 10962L, 10962L,
10962L, 12618L, 12618L, 12618L, 12618L, 12618L, 13673L,
13673L, 13673L, 13673L, 13673L, 25945L, 25945L, 25945L,
25945L, 25945L, 28145L, 28145L, 28145L, 28145L, 28145L))
I want to update the value of the p201
variable according to n_p
's value in an iterative way. n_p
can go possibly from 1 to 5 (although not in dataset
, I apologize for not being able to take a sample that contains all possible cases). In the first wave, I assign p201
to be equal to 100*(n_p-1) + V2003
if n_p == 1
and NA
otherwise. After that, I go from 2 to 5 doing the same considering only rows in which p201
is still missing.
Here is the code:
final_df <- dataset %>%
group_by(UF, UPA, V1008, V1014, V2007,
V2008, V20081, V20082, V2003) %>%
mutate(p201 = ifelse(n_p == 1 & V2008 != 99 &
V20081 != 99 & V20082 != 9999,
100*(n_p-1) + V2003, NA)) %>%
fill(p201, .direction = 'down') %>%
mutate(p201 = ifelse(n_p == 1,
p201,
ifelse(n_p == 2 & is.na(p201) & V2008 != 99 &
V20081 != 99 & V20082 != 9999,
100*(n_p-1) + V2003,
NA))) %>%
fill(p201, .direction = 'down') %>%
mutate(p201 = ifelse(n_p %in% 1:2,
p201,
ifelse(n_p == 3 & is.na(p201) & V2008 != 99 &
V20081 != 99 & V20082 != 9999,
100*(n_p-1) + V2003,
NA))) %>%
fill(p201, .direction = 'down') %>%
mutate(p201 = ifelse(n_p %in% 1:3,
p201,
ifelse(n_p == 4 & is.na(p201) & V2008 != 99 &
V20081 != 99 & V20082 != 9999,
100*(n_p-1) + V2003,
NA))) %>%
fill(p201, .direction = 'down') %>%
mutate(p201 = ifelse(n_p %in% 1:4,
p201,
ifelse(n_p == 5 & is.na(p201) & V2008 != 99 &
V20081 != 99 & V20082 != 9999,
100*(n_p-1) + V2003,
NA))) %>%
ungroup() %>%
mutate_at(c('UF', 'UPA', 'V1008', 'p201'), as.character) %>%
mutate(idind = ifelse(is.na(p201),
NA,
paste0(V1014, UF, UPA, V1008, p201)))
I clearly could use something like a for
loop (or maybe something even better, using map
?) to make the code a bit leaner, but I don't know how to insert a loop inside a pipe flow using tidyverse's syntax.
Can somebody help? My desired output is exactly the one resulting in final_df
, but with cleaner code.
PS: Please do not mind that the result will generate a lot of NAs in p201
- the whole dataset is more complex and this probably wouldn't be true with it.
EDIT I've figured out a solution by adapting Limey's answer - I didn't know about recursive functions.
Here it goes:
loop <- function(data,
interview = 2,
int_final = 5){
data <- data %>%
group_by(UF, UPA, V1008, V1014, V2007,
V2008, V20081, V20082, V2003) %>%
fill(p201, .direction = 'down') %>%
mutate(p201 = ifelse(
n_p %in% 1:(interview-1),
p201,
ifelse(
n_p == interview & is.na(p201) &
V2008 != 99 &
V20081 != 99 & V20082 != 9999,
100 * (n_p - 1) + V2003,
NA
)
))
if(interview == int_final){
return(data)
} else{
return(loop(data, interview + 1, int_final))
}
}
final_dataset <- dataset %>%
group_by(UF, UPA, V1008, V1014, V2007,
V2008, V20081, V20082, V2003) %>%
mutate(p201 = ifelse(n_p == 1 & V2008 != 99 &
V20081 != 99 & V20082 != 9999,
100*(n_p-1) + V2003, NA)) %>%
loop() %>%
ungroup() %>%
mutate_at(c('UF', 'UPA', 'V1008', 'p201'), as.character) %>%
mutate(idind = ifelse(is.na(p201),
NA,
paste0(V1014, UF, UPA, V1008, p201)))
Upvotes: 1
Views: 166
Reputation: 12461
Hmmm. Sounds like a but of recursive programming might do the trick. I have no idea why you're trying to do this and your haven't provided a desired outcome, so I can't check my results, but something like this might work.
[Untested code]
doIt <- function(data, currentDepth=1, maxDepth=5) {
data <- data %>%
group_by(UF, UPA, V1008, V1014, V2007,
V2008, V20081, V20082, V2003) %>%
mutate(p201 = ifelse(n_p %in% 1:currentDepth & V2008 != 99 & V20081 != 99 & V20082 != 9999,
100*(n_p-1) + V2003,
NA)) %>%
fill(p201, .direction = 'down')
if (currentDepth == maxDepth) return(data)
else return (doIt(data, currentDepth+1, maxDepth))
}
final_dataset <- doIt(dataset)
I've tried to generalise your ifelse
. The function either calls itself again (if current depth < maxDepth) or returns its own result.
Upvotes: 1