Reputation: 453
This is what the sample looks like:
# A tibble: 10 x 8
vehicle_id trip_id seg_1 seg_2 seg_3 seg_4 seg_5 seg_6
<chr> <int> <chr> <chr> <chr> <chr> <chr> <chr>
1 Zz3yE90z++QmTX2QO5dHI78IK/Q 1 13.67829, 100.62387 13.65382, 100.67562 13.63679, 100.71057 13.65828, 100.71631 13.65828, 100.71631 13.65828, 100.71631
2 Zz3yE90z++QmTX2QO5dHI78IK/Q 2 13.66859, 100.72032 13.66353, 100.71894 13.66353, 100.71894 13.66353, 100.71894 13.65958, 100.71773 13.65258, 100.71571
3 Zz3yE90z++QmTX2QO5dHI78IK/Q 3 13.63345, 100.71102 13.63349, 100.71096 13.63349, 100.71096 13.63349, 100.71096 13.63349, 100.71096 13.63349, 100.71096
4 Zz3yE90z++QmTX2QO5dHI78IK/Q 4 13.60629, 100.69999 13.61857, 100.70363 13.6344, 100.71093 13.6344, 100.71093 13.6344, 100.71093 13.63433, 100.71101
5 Zz3yE90z++QmTX2QO5dHI78IK/Q 5 13.56958, 100.78749 13.57759, 100.78828 13.58393, 100.79077 13.58385, 100.79403 13.59328, 100.76465 13.59612, 100.74922
6 Zz3yE90z++QmTX2QO5dHI78IK/Q 6 13.60148, 100.71103 13.60519, 100.7129 13.61076, 100.71317 13.62504, 100.71832 13.63082, 100.71459 13.63241, 100.71297
7 Zz3yE90z++QmTX2QO5dHI78IK/Q 7 13.60283, 100.70543 13.60118, 100.71196 13.60193, 100.71915 13.60487, 100.71483 13.605, 100.71402 13.60567, 100.71292
8 /+bx80f3gOoPMoFBsS+3xX6jpi8 44496 13.93405, 100.72371 13.92169, 100.69827 13.92155, 100.68866 13.92121, 100.65924 13.95379, 100.62163 13.95905, 100.62037
9 /+bx80f3gOoPMoFBsS+3xX6jpi8 44497 14.00297, 100.6638 13.99238, 100.65185 13.98482, 100.61921 13.98708, 100.61634 13.98707, 100.61612 13.98712, 100.61631
10 /+bx80f3gOoPMoFBsS+3xX6jpi8 44498 13.80508, 100.71231 13.82186, 100.72064 13.85196, 100.72147 13.88833, 100.72214 13.92194, 100.70304 13.90366, 100.66788
my.df <- structure(list(vehicle_id = c("Zz3yE90z++QmTX2QO5dHI78IK/Q","Zz3yE90z++QmTX2QO5dHI78IK/Q", "Zz3yE90z++QmTX2QO5dHI78IK/Q","Zz3yE90z++QmTX2QO5dHI78IK/Q", "Zz3yE90z++QmTX2QO5dHI78IK/Q","Zz3yE90z++QmTX2QO5dHI78IK/Q", "Zz3yE90z++QmTX2QO5dHI78IK/Q","/+bx80f3gOoPMoFBsS+3xX6jpi8", "/+bx80f3gOoPMoFBsS+3xX6jpi8","/+bx80f3gOoPMoFBsS+3xX6jpi8"), trip_id = c(1L, 2L, 3L, 4L, 5L,6L, 7L, 44496L, 44497L, 44498L), seg_1 = c("13.67829, 100.62387","13.66859, 100.72032", "13.63345, 100.71102", "13.60629, 100.69999","13.56958, 100.78749", "13.60148, 100.71103", "13.60283, 100.70543","13.93405, 100.72371", "14.00297, 100.6638", "13.80508, 100.71231"), seg_2 = c("13.65382, 100.67562", "13.66353, 100.71894", "13.63349, 100.71096","13.61857, 100.70363", "13.57759, 100.78828", "13.60519, 100.7129","13.60118, 100.71196", "13.92169, 100.69827", "13.99238, 100.65185","13.82186, 100.72064"), seg_3 = c("13.63679, 100.71057", "13.66353, 100.71894","13.63349, 100.71096", "13.6344, 100.71093", "13.58393, 100.79077","13.61076, 100.71317", "13.60193, 100.71915", "13.92155, 100.68866","13.98482, 100.61921", "13.85196, 100.72147"), seg_4 = c("13.65828, 100.71631","13.66353, 100.71894", "13.63349, 100.71096", "13.6344, 100.71093","13.58385, 100.79403", "13.62504, 100.71832", "13.60487, 100.71483","13.92121, 100.65924", "13.98708, 100.61634", "13.88833, 100.72214"), seg_5 = c("13.65828, 100.71631", "13.65958, 100.71773", "13.63349, 100.71096","13.6344, 100.71093", "13.59328, 100.76465", "13.63082, 100.71459","13.605, 100.71402", "13.95379, 100.62163", "13.98707, 100.61612","13.92194, 100.70304"), seg_6 = c("13.65828, 100.71631", "13.65258, 100.71571","13.63349, 100.71096", "13.63433, 100.71101", "13.59612, 100.74922","13.63241, 100.71297", "13.60567, 100.71292", "13.95905, 100.62037","13.98712, 100.61631", "13.90366, 100.66788")), row.names = c(NA,-10L), class = c("tbl_df", "tbl", "data.frame"))
I want to separate seg_X
columns from comma and convert them into numeric.
So, Mu output should be like this;
# A tibble: 10 x 14
vehicle_id trip_id seg_1_lat seg_1_lon seg_2_lat seg_2_lon seg_3_lat seg_3_lon seg_4_lat seg_4_lon seg_5_lat seg_5_lon seg_6_lat seg_6_lon
<chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Zz3yE90z++QmTX2QO5~ 1 13.67829 100.6238~ 13.65382 100.6756~ 13.63679 100.710~ 13.65828 100.716~ 13.65828 100.716~ 13.65828 100.716~
Upvotes: 1
Views: 50
Reputation: 17668
you can try a tidyverse
library(tidyverse)
my.df %>%
pivot_longer(-1:-2) %>%
separate_rows(value, sep=", ") %>%
mutate(name = paste(name, c("lat", "lon"), sep="_")) %>%
pivot_wider(names_from = name, values_from = value, values_fn = as.numeric)
# A tibble: 10 x 14
vehicle_id trip_id seg_1_lat seg_1_lon seg_2_lat seg_2_lon seg_3_lat seg_3_lon seg_4_lat seg_4_lon seg_5_lat seg_5_lon
<chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Zz3yE90z+~ 1 13.7 101. 13.7 101. 13.6 101. 13.7 101. 13.7 101.
2 Zz3yE90z+~ 2 13.7 101. 13.7 101. 13.7 101. 13.7 101. 13.7 101.
3 Zz3yE90z+~ 3 13.6 101. 13.6 101. 13.6 101. 13.6 101. 13.6 101.
4 Zz3yE90z+~ 4 13.6 101. 13.6 101. 13.6 101. 13.6 101. 13.6 101.
5 Zz3yE90z+~ 5 13.6 101. 13.6 101. 13.6 101. 13.6 101. 13.6 101.
6 Zz3yE90z+~ 6 13.6 101. 13.6 101. 13.6 101. 13.6 101. 13.6 101.
7 Zz3yE90z+~ 7 13.6 101. 13.6 101. 13.6 101. 13.6 101. 13.6 101.
8 /+bx80f3g~ 44496 13.9 101. 13.9 101. 13.9 101. 13.9 101. 14.0 101.
9 /+bx80f3g~ 44497 14.0 101. 14.0 101. 14.0 101. 14.0 101. 14.0 101.
10 /+bx80f3g~ 44498 13.8 101. 13.8 101. 13.9 101. 13.9 101. 13.9 101.
# ... with 2 more variables: seg_6_lat <dbl>, seg_6_lon <dbl>
Or something like
map(names(my.df)[-1:-2], ~separate(my.df, .x, into = paste(.x, c("lat", "lon"), sep="_"), sep=", ") %>% select(contains(.x))) %>%
bind_cols(my.df[,1:2],.)
Upvotes: 2