VR28
VR28

Reputation: 214

complete sequence column names and fill R

I have large data similar to the following:

week_0<-c(5,0,1,0,8,1)
week_4<-c(1,0,1,0,1,1)
week_8<-c(1,0,6,0,0,0)
week_9<-c(2,4,1,7,8,1)
week_10<-c(2,4,1,7,8,1)
Participant<-c("Lion","Cat","Dog","Snake","Tiger","Mouse")
test_data<-data.frame(Participant,week_0,week_4,week_8,week_9,week_10)

 > test_data
 Participant week_0 week_4 week_8 week_9 week_10
1        Lion      5      1      1      2       2
2         Cat      0      0      0      4       4
3         Dog      1      1      6      1       1
4       Snake      0      0      0      7       7
5       Tiger      8      1      0      8       8
6       Mouse      1      1      0      1       1

I want to fill out the gap between the columnnames numbers. The end result that I'm looking for is:

test_data
      Participant week_0 week_1 week_2 week_3 week_4 week_5 week_6 week_7 week_8 week_9 week_10
1        Lion      5      5      5      5      1      1      1      1      1      2       2
2         Cat      0      0      0      0      0      0      0      0      0      4       4
3         Dog      1      1      1      1      1      1      1      1      6      1       1
4       Snake      0      0      0      0      0      0      0      0      0      7       7
5       Tiger      8      8      8      8      1      1      1      1      0      8       8
6       Mouse      1      1      1      1      1      1      1      1      0      1       1

I have looked at the Fill function in r, but I can't get the result that I want. Any suggestions on how to do this?

Upvotes: 4

Views: 144

Answers (2)

akrun
akrun

Reputation: 886938

Using base R - extract the numeric suffix part from the 'week' column names, then get a sequence between the min/max values ('i2'), replicate the columns based on matching the indexes and rename the column names with i2

i1 <- as.integer(sub("week_", "", names(test_data)[-1]))
i2 <- Reduce(`:`, as.list(range(i1)))
test_data <- cbind(test_data[1], test_data[-1][cumsum(!is.na(match(i2, i1)))])
names(test_data)[-1] <- paste0("week_", i2)

-output

> test_data
  Participant week_0 week_1 week_2 week_3 week_4 week_5 week_6 week_7 week_8 week_9 week_10
1        Lion      5      5      5      5      1      1      1      1      1      2       2
2         Cat      0      0      0      0      0      0      0      0      0      4       4
3         Dog      1      1      1      1      1      1      1      1      6      1       1
4       Snake      0      0      0      0      0      0      0      0      0      7       7
5       Tiger      8      8      8      8      1      1      1      1      0      8       8
6       Mouse      1      1      1      1      1      1      1      1      0      1       1

With tidyverse, an option is to reshape to 'long' with pivot_longer, use complete to expand the data, fill the missing values with previous non-NA, and reshape back to 'wide' with pivot_wider

library(dplyr)
library(tidyr)
test_data %>%
  pivot_longer(cols = starts_with('week_'), 
    names_prefix = "week_", names_transform = as.integer) %>% 
  complete(Participant, name = full_seq(name, period = 1)) %>% 
  fill(value, .direction = "downup") %>%
  pivot_wider(names_from = name, values_from = value, 
   names_prefix = "week_") %>% 
  arrange(match(Participant, test_data$Participant))

-output

# A tibble: 6 × 12
  Participant week_0 week_1 week_2 week_3 week_4 week_5 week_6 week_7 week_8 week_9 week_10
  <chr>        <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
1 Lion             5      5      5      5      1      1      1      1      1      2       2
2 Cat              0      0      0      0      0      0      0      0      0      4       4
3 Dog              1      1      1      1      1      1      1      1      6      1       1
4 Snake            0      0      0      0      0      0      0      0      0      7       7
5 Tiger            8      8      8      8      1      1      1      1      0      8       8
6 Mouse            1      1      1      1      1      1      1      1      0      1       1

Upvotes: 1

jkatam
jkatam

Reputation: 3447

Please check the below code

test_data<-data.frame(Participant,week_0,week_4,week_8,week_9,week_10) %>% 
  pivot_longer(starts_with('week'), names_to = 'name', values_to = 'value') %>%
  mutate(seq=as.numeric(str_replace_all(name,'\\w*\\_',''))) %>% arrange(Participant)

  
seq <- data.frame(Participant=rep(unique(Participant),11)) %>% group_by(Participant) %>% 
  mutate(seq=row_number(), seq=seq-1) %>% 
  arrange(Participant)

test_data2 <- test_data %>% right_join(seq, by=c('Participant','seq')) %>% 
  arrange(Participant) %>% 
  mutate(name=ifelse(is.na(name),paste0('week_',seq),name)) %>% arrange(Participant,seq) %>% 
  group_by(Participant) %>% 
  fill(value) %>% 
  pivot_wider(Participant, names_from = name, values_from = value)

Created on 2023-01-28 with reprex v2.0.2

# A tibble: 6 × 11
# Groups:   Participant [6]
  Participant week_0 week_2 week_3 week_4 week_5 week_6 week_7 week_8 week_9 week_10
  <chr>        <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
1 Cat              0      0      0      0      0      0      0      0      4       4
2 Dog              1      1      1      1      1      1      1      6      1       1
3 Lion             5      5      5      1      1      1      1      1      2       2
4 Mouse            1      1      1      1      1      1      1      0      1       1
5 Snake            0      0      0      0      0      0      0      0      7       7
6 Tiger            8      8      8      1      1      1      1      0      8       8

Upvotes: 1

Related Questions