Reputation: 33
I have a dataframe with multiple observations (the amount and their names vary often) with minimum and maximum values. Example:
ID O1_min O1_max O2_min O2_max O3_min O3_max
A 1 2 1 2 1 2
B 1 2 1 2 1 2
C 1 2 1 2 1 2
D 1 2 1 2 1 2
I want to go through my data frame and transform all _min and _max columns in a _range column for each observation. So it would look like this:
ID O1_range O2_range O3_range
A 1:2 1:2 1:2
B 1:2 1:2 1:2
C 1:2 1:2 1:2
D 1:2 1:2 1:2
I was using paste()
function but that won't solve my problem, once new columns can enter the dataframe at any time and I'd like to have a more automatic code.
Upvotes: 2
Views: 1538
Reputation: 40171
One base R
possibility could be:
df <- data.frame(df[1],
mapply(function(x, y) paste(x, y, sep = ":"),
df[-1][c(TRUE, FALSE)], df[-1][c(FALSE, TRUE)]))
colnames(df) <- sub("\\_.*", "\\1", colnames(df))
ID O1 O2 O3
1 A 1:2 1:2 1:2
2 B 1:2 1:2 1:2
3 C 1:2 1:2 1:2
4 D 1:2 1:2 1:2
If you need to name your columns with _range
, then you can replace the last line with:
colnames(df)[2:length(df)] <- paste0("O", 1:(length(df) - 1), "_range")
ID O1_range O2_range O3_range
1 A 1:2 1:2 1:2
2 B 1:2 1:2 1:2
3 C 1:2 1:2 1:2
4 D 1:2 1:2 1:2
Sample data:
df <- read.table(text = "ID O1_min O1_max O2_min O2_max O3_min O3_max
A 1 2 1 2 1 2
B 1 2 1 2 1 2
C 1 2 1 2 1 2
D 1 2 1 2 1 2",
header = TRUE,
stringsAsFactors = FALSE)
Upvotes: 3
Reputation: 6116
Suppose columns have the same naming convention, you can use purrr::map2_dfc()
.
library(dplyr)
library(purrr)
library(stringr)
data <- read_delim("ID O1_min O1_max O2_min O2_max O3_min O3_max
A 1 2 1 2 1 2
B 1 2 1 2 1 2
C 1 2 1 2 1 2
D 1 2 1 2 1 2",delim = " ") %>%
mutate_all(str_trim)
# concatenation
result <- map2_dfc(select(data,ends_with("min")),
select(data,ends_with("max")),
function(x,y){
str_c(x,":",y)
})
# rename columns
colnames(result) <- str_replace(colnames(result),pattern = "_.+","range")
#result
bind_cols(data[,1],result)
# A tibble: 4 x 4
ID O1range O2range O3range
<chr> <chr> <chr> <chr>
1 A 1:2 1:2 1:2
2 B 1:2 1:2 1:2
3 C 1:2 1:2 1:2
4 D 1:2 1:2 1:2
Upvotes: 5
Reputation: 5138
You can do this in base R using Map
. I used @avid_useR's data. This uses Map
to paste alternating columns (minus the ID column) and assigns them to your new columns names [paste0("O", 1:3, "_range")]
. Hope this helps!
df_new <- data.frame(ID = df1$ID, stringsAsFactors = F)
df_new[paste0("O", 1:3, "_range")] <- Map(paste, df1[-1][c(T,F)], df1[-1][c(F,T)], sep = ":")
df_new
df1.ID O1_range O2_range O3_range
1 A 1:2 1:2 1:2
2 B 1:2 1:2 1:2
3 C 1:2 1:2 1:2
4 D 1:2 1:2 1:2
Upvotes: 2
Reputation: 18691
We can do it with gather
and spread
from tidyr
:
library(tidyverse)
df %>%
gather(var, value, -ID) %>%
group_by(ID, col = sub('_[a-z]+$', '', var)) %>%
summarize(value = paste(value, collapse = ":")) %>%
spread(col, value) %>%
rename_at(vars(-ID), paste0, "_range")
Output:
# A tibble: 4 x 4
# Groups: ID [4]
ID O1_range O2_range O3_range
<fct> <chr> <chr> <chr>
1 A 1:2 1:2 1:2
2 B 1:2 1:2 1:2
3 C 1:2 1:2 1:2
4 D 1:2 1:2 1:2
Data:
df <- structure(list(ID = structure(1:4, .Label = c("A", "B", "C",
"D"), class = "factor"), O1_min = c(1L, 1L, 1L, 1L), O1_max = c(2L,
2L, 2L, 2L), O2_min = c(1L, 1L, 1L, 1L), O2_max = c(2L, 2L, 2L,
2L), O3_min = c(1L, 1L, 1L, 1L), O3_max = c(2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA,
-4L))
Upvotes: 2
Reputation: 1114
This answer is a bit lengthy but does the trick, plus can handle more columns if they follow the same naming format.
df <- data.frame(id=letters[1:4],A1_min=c(1:4),A1_max=c(2:5),
B1_min=c(3:6),B1_max=c(4:7))
df %>% gather(k,v,-id) %>%
mutate(cat=str_split_fixed(k,'_',2)[,1],
val=str_split_fixed(k,'_',2)[,2]) %>%
select(-k) %>% spread(val,v) %>%
mutate(range=paste0(min,'-',max)) %>%
select(-max,-min) %>% spread(cat,range)
id A1 B1
1 a 1-2 3-4
2 b 2-3 4-5
3 c 3-4 5-6
4 d 4-5 6-7
Upvotes: 2