MyName
MyName

Reputation: 109

arrange a complicated data set in R

I have a large data set: > ncol(d) [1] 1680 nrow(d) [1] 12 that it looks like this:

a  b  c  e  f  g
3  2  5  1  3  6
a  b  c  d  e  g
1  7  8  4  5  8
a  c  d  e  f  h      #in this row b does not exist 
5  10 4  7  5  10

And I need that it looks like this:

a  b  c  d  e  f  g  h  
3  2  5  0  3  6  10 8  
1  7  8  4  5  0  8  0 
5  0  10 4  7  5  0  10                 #and all the other columns ...  

Since my data is really long and I have many corrections like this one to do over all the data set, it is hard to do it by hand. I would like to know if there is any way to do this using some sort of automatic way, like a logic function or a loop. Any idea is welcome Regards

Upvotes: 0

Views: 108

Answers (2)

akrun
akrun

Reputation: 887981

We could get the alternate rows with recycling logical vector, construct a data.frame and pivot it to wide format with pivot_wider

library(dplyr)
library(tidyr)
library(data.table)
sub1 <- df1[c(TRUE, FALSE),]
sub2 <- df1[c(FALSE, TRUE),]
tibble(ind = c(row(sub1)), col1 = factor(unlist(sub1), levels = letters[1:8]), 
        col2 = as.integer(unlist(sub2))) %>% 
    pivot_wider(names_from = col1, values_from = col2,
       values_fill = list(col2 = 0)) %>%
    select(-ind)
#A tibble: 3 x 8
#      a     b     c     d     e     f     g     h
#  <int> <int> <int> <int> <int> <int> <int> <int>
#1     3     2     5     0     1     3     6     0
#2     1     7     8     4     5     0     8     0
#3     5     0    10     4     7     5     0    10

Or using base R with reshape

out <- reshape(
           data.frame(ind = c(row(sub1)), 
               col1 = factor(unlist(sub1), levels = letters[1:8]), 
               col2 = as.integer(unlist(sub2))),
        idvar = 'ind', direction = 'wide', timevar = 'col1')[-1]
names(out) <- sub("col2\\.", "", names(out))
out[is.na(out)] <- 0
row.names(out) <- NULL

out
#  a b  c d e f g  h
#1 3 2  5 0 1 3 6  0
#2 1 7  8 4 5 0 8  0
#3 5 0 10 4 7 5 0 10

data

df1 <- structure(list(v1 = c("a", "3", "a", "1", "a", "5"), v2 = c("b", 
"2", "b", "7", "c", "10"), v3 = c("c", "5", "c", "8", "d", "4"
), v4 = c("e", "1", "d", "4", "e", "7"), v5 = c("f", "3", "e", 
"5", "f", "5"), v6 = c("g", "6", "g", "8", "h", "10")), class = "data.frame", 
row.names = c(NA, 
-6L))

Upvotes: 0

A5C1D2H2I1M1N2O1R2T1
A5C1D2H2I1M1N2O1R2T1

Reputation: 193687

Here's a possible approach using data.table:

library(data.table)
melt(
  setDT(
    setnames(
      data.table::transpose(df1), 
      paste(rep(1:(nrow(df1)/2), each = 2), c("name", "value"), sep = "_"))),
  measure = patterns("name", "value"))[
    , dcast(.SD, variable ~ value1, value.var = "value2", fill = 0)]
#    variable a b  c d e f g  h
# 1:        1 3 2  5 0 1 3 6  0
# 2:        2 1 7  8 4 5 0 8  0
# 3:        3 5 0 10 4 7 5 0 10

Upvotes: 1

Related Questions