Reputation: 59
I have this dataframe CC.Number Date Time Accident.Type Location.1 1 12T008826 07/01/2012 1630 PD (39.26699, -76.560642) 2 12L005385 07/02/2012 1229 PD (39.000549, -76.399312) 3 12L005388 07/02/2012 1229 PD (39.00058, -76.399267) 4 12T008851 07/02/2012 445 PI (39.26367, -76.56648) 5 12T008858 07/02/2012 802 PD (39.240862, -76.599017) 6 12T008860 07/02/2012 832 PD (39.27022, -76.63926)
I want to split the column Location.1 to "alt" and "lng" columns to be like
CC.Number Date Time Accident.Type alt lng
1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
I tried
location <- md$Location.1
location1 <- substring(location, 2)
location2 <- substr(location1, 1, nchar(location1)-1 )
location3 <- strsplit(location2, ",")
but stuck at converting location3 from list to dataframe
I tried
ocdf<-data.frame(location2)
colnames(locdf)[1] = c("x")
df <- separate(location, col=x,into = c("lat","log"), sep = ",")
but I get an error
Error in UseMethod("separate_") : no applicable method for 'separate_' applied to an object of class "character"
Upvotes: 3
Views: 15122
Reputation: 39717
In base you can use trimws
to remove ()
and read.table
to split at ,
.
cbind(md[1:4], read.table(sep=",", text=trimws(md$Location.1, whitespace = "[ ()]"),
col.names=c("alt", "lng")))
# CC.Number Date Time Accident.Type alt lng
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.56064
#2 12L005385 07/02/2012 1229 PD 39.00055 -76.39931
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.39927
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.24086 -76.59902
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
Data:
md <- structure(list(CC.Number = c("12T008826", "12L005385", "12L005388",
"12T008851", "12T008858", "12T008860"), Date = c(" 07/01/2012",
" 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012"
), Time = c(1630L, 1229L, 1229L, 445L, 802L, 832L), Accident.Type = c(" PD",
" PD", " PD", " PI", " PD",
" PD"), Location.1 = c(" (39.26699, -76.560642)",
" (39.000549, -76.399312)", " (39.00058, -76.399267)", " (39.26367, -76.56648)",
" (39.240862, -76.599017)", " (39.27022, -76.63926)")), class = "data.frame", row.names = c(NA,
-6L))
Upvotes: 1
Reputation: 92
separate
from tidyr
also works
library(tidyr)
# Sub out the parentheses
df$Location.1 <- gsub("[()]", "", df$Location.1)
separate(df, col = Location.1, into = c("lat","long"), sep = ",")
# CC.Number Date Time Accident.Type lat long
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
#2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.240862 -76.599017
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
Upvotes: 4
Reputation: 11128
You can do like this also, assuming dat1
is your original dataset name, we can use strsplit and gsub. First we replace commas and parenthesis with nothing using gsub and then using strsplit to split the values by spaces:
df1 <- setNames(data.frame(do.call("rbind",strsplit(gsub("\\(|\\)|,","",dat1$Location.1),split=" "))),c("Lat","Long"))
df2 <- data.frame(cbind(dat1[,1:(length(dat1)-1)],df1))
# CC.Number Date Time Accident.Type Lat Long
# 1 12T008826 07/01/12 1630 PD 39.26699 -76.560642
# 2 12L005385 07/02/12 1229 PD 39.000549 -76.399312
# 3 12L005388 07/02/12 1229 PD 39.00058 -76.399267
# 4 12T008851 07/02/12 445 PI 39.26367 -76.56648
# 5 12T008858 07/02/12 802 PD 39.240862 -76.599017
# 6 12T008860 07/02/12 832 PD 39.27022 -76.63926
Upvotes: 2
Reputation: 887891
We can use extract
from tidyr
by capturing as two groups with only the numeric elements with dots and discard the rest in 'Location.1'
library(tidyr)
df1 %>%
extract(Location.1, into = c('alt', 'lng'), "\\(([0-9.]+),\\s+(-*[0-9.]+).")
# CC.Number Date Time Accident.Type alt lng
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
#2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.240862 -76.599017
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
Upvotes: 2