Reputation: 509
I have a dataset like this:
structure(list(`Frequency
Percent` = c("car", "window", "ball",
"ups"), AI = c("2\n0.00", "3\n0.00", "1\n0.00", "2\n0.00"), BLK = c("0\n0.00",
"218\n0.29", "48\n0.06", "0\n0.00"), HIANIC = c("1\n0.00", "8\n0.01",
"4\n0.01", "0\n0.00"), NATRICAN = c("9\n0.01", "7\n0.01", "8\n0.01",
"0\n0.00"), UNK = c("15\n0.02", "83\n0.11", "36\n0.05", "0\n0.00"
), yy = c("111\n0.15", "897\n1.20", "756\n1.02", "1\n0.00")), class = "data.frame", row.names = c(NA,
-4L))
How can I split each row by "" and remove n to make two new columns. For instance, car
and AI
cell (“2\n0.00”), I will have 2 and 0.00 in two different columns.
Upvotes: 1
Views: 62
Reputation: 39647
A base one liner:
do.call(data.frame, lapply(DF, \(x) do.call(rbind, strsplit(x, "\n"))))
# Frequency.Percent AI.1 AI.2 BLK.1 BLK.2 HIANIC.1 HIANIC.2 NATRICAN.1
#1 car 2 0.00 0 0.00 1 0.00 9
#2 window 3 0.00 218 0.29 8 0.01 7
#3 ball 1 0.00 48 0.06 4 0.01 8
#4 ups 2 0.00 0 0.00 0 0.00 0
# NATRICAN.2 UNK.1 UNK.2 yy.1 yy.2
#1 0.01 15 0.02 111 0.15
#2 0.01 83 0.11 897 1.20
#3 0.01 36 0.05 756 1.02
#4 0.00 0 0.00 1 0.00
Or add also a type convert.
type.convert(do.call(data.frame, lapply(DF, \(x) do.call(rbind, strsplit(x, "\n")))), as.is=TRUE)
Upvotes: 1
Reputation: 886938
We may use cSplit
library(splitstackshape)
cSplit(df1, 2:ncol(df1), sep = "\n")
-output
Frequency\nPercent AI_1 AI_2 BLK_1 BLK_2 HIANIC_1 HIANIC_2 NATRICAN_1 NATRICAN_2 UNK_1 UNK_2 yy_1 yy_2
1: car 2 0 0 0.00 1 0.00 9 0.01 15 0.02 111 0.15
2: window 3 0 218 0.29 8 0.01 7 0.01 83 0.11 897 1.20
3: ball 1 0 48 0.06 4 0.01 8 0.01 36 0.05 756 1.02
4: ups 2 0 0 0.00 0 0.00 0 0.00 0 0.00 1 0.00
Upvotes: 0
Reputation: 519
There is also a base R solution:
dat = structure(list(`Frequency
Percent` = c("car", "window", "ball",
"ups"), AI = c("2\n0.00", "3\n0.00", "1\n0.00", "2\n0.00"), BLK = c("0\n0.00",
"218\n0.29", "48\n0.06", "0\n0.00"), HIANIC = c("1\n0.00", "8\n0.01",
"4\n0.01", "0\n0.00"), NATRICAN = c("9\n0.01", "7\n0.01", "8\n0.01",
"0\n0.00"), UNK = c("15\n0.02", "83\n0.11", "36\n0.05", "0\n0.00"
), yy = c("111\n0.15", "897\n1.20", "756\n1.02", "1\n0.00")), class = "data.frame", row.names = c(NA,
-4L))
transformed = data.frame(Freq_pc = dat[,1])
for(col in seq(2, ncol(dat))){
transformed = cbind(transformed, t(matrix(unlist(strsplit(dat[,col], "\n")), nrow=2)))
names(transformed)[c(2*(col-1), 2*(col-1)+1)] = c(paste0(names(dat)[col], "_n"), paste0(names(dat)[col], "_pc"))
}
That results in:
Freq_pc AI_n AI_pc BLK_n BLK_pc HIANIC_n HIANIC_pc NATRICAN_n NATRICAN_pc UNK_n UNK_pc yy_n yy_pc
1 car 2 0.00 0 0.00 1 0.00 9 0.01 15 0.02 111 0.15
2 window 3 0.00 218 0.29 8 0.01 7 0.01 83 0.11 897 1.20
3 ball 1 0.00 48 0.06 4 0.01 8 0.01 36 0.05 756 1.02
4 ups 2 0.00 0 0.00 0 0.00 0 0.00 0 0.00 1 0.00
Upvotes: 0
Reputation: 17174
One way is to use tidyr::separate
in a for
loop:
for(i in names(df[,-1])){
df <- tidyr::separate(df, i, sep = "\n", into = c(i, paste0(i,"_val")))
}
Output:
# Frequency\n Percent AI AI_val BLK BLK_val HIANIC HIANIC_val NATRICAN NATRICAN_val UNK UNK_val yy yy_val
# 1 car 2 0.00 0 0.00 1 0.00 9 0.01 15 0.02 111 0.15
# 2 window 3 0.00 218 0.29 8 0.01 7 0.01 83 0.11 897 1.20
# 3 ball 1 0.00 48 0.06 4 0.01 8 0.01 36 0.05 756 1.02
# 4 ups 2 0.00 0 0.00 0 0.00 0 0.00 0 0.00 1 0.00
Upvotes: 2
Reputation: 123783
Using tidyr::separate_rows
and tidyr::pivot_wider
you could do:
library(tidyr)
library(dplyr)
dat |>
mutate(unit = c("n\npct")) |>
separate_rows(-1, sep = "\n") |>
pivot_wider(names_from = "unit", values_from = -1)
#> # A tibble: 4 × 15
#> Frequency\n…¹ AI_n AI_pct BLK_n BLK_pct HIANI…² HIANI…³ NATRI…⁴ NATRI…⁵ UNK_n
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 car 2 0.00 0 0.00 1 0.00 9 0.01 15
#> 2 window 3 0.00 218 0.29 8 0.01 7 0.01 83
#> 3 ball 1 0.00 48 0.06 4 0.01 8 0.01 36
#> 4 ups 2 0.00 0 0.00 0 0.00 0 0.00 0
#> # … with 5 more variables: UNK_pct <chr>, yy_n <chr>, yy_pct <chr>,
#> # unit_n <chr>, unit_pct <chr>, and abbreviated variable names
#> # ¹`Frequency\n Percent`, ²HIANIC_n, ³HIANIC_pct, ⁴NATRICAN_n,
#> # ⁵NATRICAN_pct
Upvotes: 1