Reputation: 249
I am having trouble splitting columns in my data frame:
2010 census 2014 land area city
8175133 302.6 sq mi 783.8 km2 New york
3792621 468.7 sq mi 1213.9 km2 Los Angeles
2695598 227.7 sq mi 589.6 km2 Chicago
I want to get:
2010 census area sq/mi area sq/km city
8175133 302.6 783.8 New york
3792621 468.7 1213.9 Los Angeles
2695598 227.7 589.6 Chicago
Upvotes: 1
Views: 956
Reputation: 3230
Using data.table
and the tstrsplit
function from that package:
dat <- fread("2010 census, 2014 land area, city
8175133, 302.6 sq mi 783.8 km2, New york
3792621, 468.7 sq mi 1213.9 km2, Los Angeles
2695598, 227.7 sq mi 589.6 km2, Chicago")
dat[, c("area sq/mi", "area sq/km") := tstrsplit(`2014 land area`, " ", keep = c(1,4))]
dat[, .(`2010 census`, `area sq/mi`, `area sq/km`, city)]
# 2010 census area sq/mi area sq/km city
# 1: 8175133 302.6 783.8 New york
# 2: 3792621 468.7 1213.9 Los Angeles
# 3: 2695598 227.7 589.6 Chicago
Upvotes: 1
Reputation: 37641
Here is a Base R solution using sub
dt$Area_SqMi = sub("\\s*sq\\s*mi.*", "", dt$X2014.land.area)
dt$Area_km2 = sub(".*mi\\s+(\\S+)\\s+km2.*", "\\1", dt$X2014.land.area)
dt
X2010.census X2014.land.area city Area_SqMi Area_km2
1 8175133 302.6 sq mi 783.8 km2 New york 302.6 783.8
2 3792621 468.7 sq mi 1213.9 km2 Los Angeles 468.7 1213.9
3 2695598 227.7 sq mi 589.6 km2 Chicago 227.7 589.6
Of course, if you want to get rid of the original column, you can add dt = dt[,-2]
Upvotes: 0
Reputation: 3369
Here is a tidyr solution.
library(tidyr)
df <- data.frame(census = c(8175133, 3792621, 2695598),
land.area = c("302.6 sq mi 783.8 km2", "468.7 sq mi 1213.9 km2", "227.7 sq mi 589.6 km2"),
city = c("New york","Los Angeles","Chicago"), stringsAsFactors = FALSE)
df$land.area <- sapply(df$land.area, sub, pattern = " km2", replacement = "")
df <- df %>% separate(col = land.area, into = c("area sq/mi", "area sq/km"), sep = " sq mi ")
Upvotes: 2
Reputation: 28825
You can use stringr::str_split_fixed
:
library(stringr)
splitted <- str_split_fixed(dt$X2014.land.area, " sq mi ", 2)
splitted[,2] <- gsub( " km2", "", as.character(splitted[,2]))
colnames(splitted) <- c("area sq. mi", "area sq km")
splitted <- data.frame(splitted)
dt.2 <- cbind(dt[,c(1,3)], splitted)
dt.2
# X2010.census city area.sq..mi area.sq.km
# 1 8175133 New york 302.6 783.8
# 2 3792621 Los Angeles 468.7 1213.9
# 3 2695598 Chicago 227.7 589.6
Data
structure(list(X2010.census = c(8175133L, 3792621L, 2695598L),
X2014.land.area = c("302.6 sq mi 783.8 km2", "468.7 sq mi 1213.9 km2",
"227.7 sq mi 589.6 km2"), city = c("New york", "Los Angeles",
"Chicago")), .Names = c("X2010.census", "X2014.land.area",
"city"), row.names = c(NA, -3L), class = "data.frame") -> dt
Upvotes: 1