Anna Bokun
Anna Bokun

Reputation: 55

Move rows to be on the same height?

[demographic birth history data] (demographic birth histories)[1] The image above shows the first ~20 rows of my df.

The goal is to move the b3_01 - b3_10 rows to be on the same height as rows that have a number in the v011 column. For example, caseid #4 is the mom and case ids #5 and 6 are her kids. I want both of the 1297s to be next to the 973.

I'm stumped!

structure(list(caseid = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), v008 = c(1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417, 1417), v011 = c(1081, NA, NA, 973, NA, NA, NA, 709, NA, NA, NA, 1045, NA, NA, NA, 877, NA, NA, NA, 685), b3_01 = c(NA, NA, NA, NA, 1297, NA, NA, NA, 1189, NA, NA, NA, NA, 1405, NA, NA, NA, NA, 1297, NA), b3_02 = c(NA, NA, NA, NA, NA, 1297, NA, NA, NA, NA, NA, NA, 1393, NA, NA, NA, NA, 1225, NA, NA), b3_03 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1189, NA, NA, NA), b3_04 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), b3_05 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), b3_06 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), b3_07 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), b3_08 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), b3_09 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), b3_10 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"))

So far I tried to bump up the b3_01-b3_10 values, by they are not always the same distance apart (e.g. b3_01 is not always 1 below v011).

## Not the solution, but nice try  
      hello4 <- hello4 %>% 
        mutate_at(c("b3_01"), funs(lead), n = 1)

      hello4 <- hello4 %>% 
        mutate_at(c("b3_02"), funs(lead), n = 2)

      hello4 <- hello4 %>% 
        mutate_at(c("b3_03"), funs(lead), n = 3)

      etc.

Upvotes: 0

Views: 59

Answers (2)

bouncyball
bouncyball

Reputation: 10761

We can use the fill function from the tidyr package. First, we fill in missing values for v011, then we fill in the remaining columns after grouping by v011. Filling in the missing values for v011 is necessary, because it is the variable we group by. If you need to retain its missingness, you could create a copy of the column (v011_copy).

library(dplyr)
library(tidyverse)

d %>%
    fill(v011, .direction = "down") %>%
    group_by(v011) %>%
    fill(-caseid, -v008, -v011, .direction = "up")

#    caseid  v008  v011 b3_01 b3_02 b3_03
#     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1      1  1417  1081    NA    NA    NA
#  2      2  1417  1081    NA    NA    NA
#  3      3  1417  1081    NA    NA    NA
#  4      4  1417   973  1297  1297    NA
#  5      5  1417   973  1297  1297    NA
#  6      6  1417   973    NA  1297    NA
#  7      7  1417   973    NA    NA    NA
#  8      8  1417   709  1189    NA    NA
#  9      9  1417   709  1189    NA    NA
# 10     10  1417   709    NA    NA    NA
# 11     11  1417   709    NA    NA    NA
# ... 

Upvotes: 1

akrun
akrun

Reputation: 887231

We can use na.locf from zoo

library(data.table)
library(zoo)
nm1 <- setdiff(names(d), c("caseid", "v008", "v011"))
setDT(d)[, v011 := na.locf0(v011)]
d[,  (nm1) :=  lapply(.SD, na.locf0, fromLast = TRUE), 
         v011, .SDcols = nm1]
d[, 1:6]
#    caseid v008 v011 b3_01 b3_02 b3_03
# 1:      1 1417 1081    NA    NA    NA
# 2:      2 1417 1081    NA    NA    NA
# 3:      3 1417 1081    NA    NA    NA
# 4:      4 1417  973  1297  1297    NA
# 5:      5 1417  973  1297  1297    NA
# 6:      6 1417  973    NA  1297    NA
# 7:      7 1417  973    NA    NA    NA
# 8:      8 1417  709  1189    NA    NA
# 9:      9 1417  709  1189    NA    NA
#10:     10 1417  709    NA    NA    NA
#11:     11 1417  709    NA    NA    NA
#12:     12 1417 1045  1405  1393    NA
#13:     13 1417 1045  1405  1393    NA
#14:     14 1417 1045  1405    NA    NA
#15:     15 1417 1045    NA    NA    NA
#16:     16 1417  877  1297  1225  1189
#17:     17 1417  877  1297  1225  1189
#18:     18 1417  877  1297  1225    NA
#19:     19 1417  877  1297    NA    NA
#20:     20 1417  685    NA    NA    NA

Upvotes: 1

Related Questions