psysky
psysky

Reputation: 3195

concatenate two columns into one in R

My data

conc_data=structure(list(kod_nar.id = c(1L, 3L, 2L), 
    x123_1 = c(0L, 0L, 0L), 
    x124_2 = c(0, 0.123, 0.122), 
    x125_3 = 0:2, 
    x126_4 = c(0, 0.234, 0.99)),
   .Names = c("kod_nar.id", "x123_1", "x124_2", "x125_3", "x126_4"), 
   class = "data.frame", row.names = c(NA, -3L))

There are 4 columns here, but every 2 columns need to be combined into one, with the name of the first column. In other words, each pair of columns should be combined into one with concatenating the values of digits. As a result, we will have only 2 columns in the data frame. Each column in data frame has a pair.The number of columns is even.the columns are in order the first pair, the second pair, and so on

I.E. Output

  kod_nar.id   x123_1   x125_3
1          1        0        0
2          3 0(0.123) 1(0.234)
3          2 0(0.122)  2(0.99)

How to do it?

Upvotes: 1

Views: 303

Answers (6)

IceCreamToucan
IceCreamToucan

Reputation: 28705

If you melt to long format you can do this with data.table group operations and then dcast back to wide

df_long <- 
  melt(conc_data, 1)[
      , .(variable = variable[1],
          value = sprintf('%.0f(%.3f)', value[1], value[2]))
      , by = .(kod_nar.id, id = (rowid(kod_nar.id) - 1) %/% 2)]

out <- dcast(df_long, kod_nar.id ~ variable)

out
#    kod_nar.id   x123_1   x125_3
# 1:          1 0(0.000) 0(0.000)
# 2:          2 0(0.122) 2(0.990)
# 3:          3 0(0.123) 1(0.234)

If it's important to have just '0' on those first rows you could add this additional step

out <- out[, lapply(.SD, function(x) ifelse(grepl('[1-9]', x), x, '0'))]

out
#    kod_nar.id   x123_1   x125_3
# 1:          1        0        0
# 2:          2 0(0.122) 2(0.990)
# 3:          3 0(0.123) 1(0.234)

Upvotes: 2

GKi
GKi

Reputation: 39717

You can do this e.g. by using sapply and paste. I'm assuming to print only one number if the numbers are equal in both columns:

tt  <- seq(2,ncol(conc_data),2)
res  <- cbind(conc_data[1], sapply(tt, function(i) {
  ifelse(conc_data[,i] != conc_data[,i+1], paste0(conc_data[,i], "(", conc_data[,i+1],")") ,paste0(conc_data[,i]))
}
))
names(res)[-1]  <- names(conc_data)[s]
res
#  kod_nar.id   x123_1   x125_3
#1          1        0        0
#2          3 0(0.123) 1(0.234)
#3          2 0(0.122)  2(0.99)

Or by using the column name direct in sapply:

tt  <- seq(2,ncol(conc_data),2)
cbind(conc_data[1], sapply(names(conc_data)[tt], function(i) {
  i2  <- which(names(conc_data) == i)+1
  ifelse(conc_data[,i] != conc_data[,i2], paste0(conc_data[,i], "(", conc_data[,i2],")") ,paste0(conc_data[,i]))
  }
))
#  kod_nar.id   x123_1   x125_3
#1          1        0        0
#2          3 0(0.123) 1(0.234)
#3          2 0(0.122)  2(0.99)

Upvotes: 1

akrun
akrun

Reputation: 887851

An option would be to loop over the sets of columns, use sprintf to format the columns of interest and cbind with the first column

out <- cbind(conc_data[1], sapply(list(2:3, 4:5), 
          function(i) sprintf("%d(%f)", 
        round(conc_data[,i[1]], 2), conc_data[,i[2]])))

If the value for '0' needs to be zero

out <- cbind(conc_data[1], sapply(list(2:3, 4:5), function(i) {
   dat <- conc_data[i]
   i1 <- !rowSums(dat != 0)
   v1 <- do.call(sprintf, c(fmt = "%d(%.3f)", dat))
   v1[i1] <- 0
    v1
    }))
names(out)[-1] <- names(conc_data)[c(2, 4)]
out
#  kod_nar.id   x123_1   x125_3
#1          1        0        0
#2          3 0(0.123) 1(0.234)
#3          2 0(0.122) 2(0.990)

Or more compactly

data.frame(c(conc_data[1], Map(sprintf, conc_data[c(2, 4)], 
        conc_data[c(3, 5)], MoreArgs = list(fmt = "%d(%.3f)"))))

Upvotes: 4

Ben G
Ben G

Reputation: 4338

Here's a tidyverse solution:

library(tidyverse)

conc_data %>%
 mutate(x123_1 = ifelse(x123_1 == x124_2, 
                         x123_1,
                         paste0(x123_1, "(", x124_2, ")")
                        ),
        x125_3 = ifelse(x125_3 == x126_4,
                        x125_3,
                        paste0(x125_3, "(", x126_4, ")")
                        )) %>%
 select(x123_1, x125_3)


    x123_1   x125_3
1        0        0
2 0(0.123) 1(0.234)
3 0(0.122)  2(0.99)

Upvotes: 1

Ronak Shah
Ronak Shah

Reputation: 389235

We can split every two columns using split.default and use sapply to paste the two columns together in the format required. We add names to the output by selecting althernating column name.

output <- cbind(conc_data[1], sapply(split.default(conc_data[-1], 
           rep(seq_along(conc_data), each = 2)[1:(ncol(conc_data) - 1)]), 
   function(x) paste0(x[[1]], "(", x[[2]], ")")))

names(output)[-1] <- names(conc_data)[-1][c(TRUE, FALSE)]

output
#  kod_nar.id   x123_1   x125_3
#1          1     0(0)     0(0)
#2          3 0(0.123) 1(0.234)
#3          2 0(0.122)  2(0.99)

Or maybe a bit simpler to split using gl

output <- cbind(conc_data[1], sapply(split.default(conc_data[-1],
 gl((ncol(conc_data) - 1)/2, 2)), 
   function(x) paste0(x[[1]], "(", x[[2]], ")")))

Upvotes: 3

r.user.05apr
r.user.05apr

Reputation: 5456

Or:

conc_data$x123_1 <- with(conc_data, ifelse(x124_2 == 0, "0", sprintf("%d(%.3f)", x123_1, x124_2)))
conc_data$x125_3 <- with(conc_data, ifelse(x126_4 == 0, "0", sprintf("%d(%.3f)", x125_3, x126_4)))

Upvotes: 5

Related Questions