Reputation: 453
This is what the sample looks like:
# A tibble: 10 x 18
trip_id position_lon_1 position_lat_1 position_lon_2 position_lat_2 position_lon_3 position_lat_3 position_lon_4 position_lat_4 position_lon_5
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 101. 13.7 101. 13.7 101. 13.7 101. 13.7 101.
2 2 101. 13.7 101. 13.7 101. 13.7 101. 13.7 101.
3 3 101. 13.8 101. 13.7 101. 13.7 101. 13.7 101.
4 4 101. 13.8 101. 13.8 101. 13.8 101. 13.7 101.
5 5 101. 13.8 101. 13.8 101. 13.8 101. 13.8 101.
6 6 101. 13.7 101. 13.7 101. 13.7 101. 13.7 101.
7 7 101. 13.7 101. 13.7 101. 13.7 101. 13.7 101.
8 1977 101. 13.7 101. 13.7 101. 13.7 101. 13.7 101.
9 1978 100. 13.7 101. 13.7 101. 13.7 101. 13.7 101.
10 1979 101. 13.8 101. 13.8 101. 13.8 101. 13.7 101.
# ... with 8 more variables: position_lat_5 <dbl>, position_lon_6 <dbl>, position_lat_6 <dbl>, start_time1 <dbl>, end_time1 <dbl>, length <dbl>,
# purpose <chr>, clusterNum <dbl>
example <- structure(list(trip_id = c(1, 2, 3, 4, 5, 6, 7, 1977, 1978, 1979), position_lon_1 = c(100.5423, 100.53881, 100.5824, 100.53724,100.52715, 100.50491, 100.51813, 100.57668, 100.49228, 100.52679), position_lat_1 = c(13.7444, 13.71867, 13.75446, 13.76453,13.76981, 13.73128, 13.73133, 13.73969, 13.7447, 13.76592), position_lon_2 = c(100.54767,100.53822, 100.57764, 100.53057, 100.53335, 100.50947, 100.52216,100.57951, 100.50324, 100.52661), position_lat_2 = c(13.74409,13.72446, 13.74911, 13.76827, 13.75848, 13.72012, 13.73132, 13.73407,13.74693, 13.76381), position_lon_3 = c(100.54828, 100.54081,100.58012, 100.53755, 100.53717, 100.53354, 100.52927, 100.58263,100.51611, 100.52553), position_lat_3 = c(13.74762, 13.72764,13.74582, 13.75122, 13.76324, 13.72289, 13.73678, 13.73423, 13.73462,13.75167), position_lon_4 = c(100.54839, 100.55916, 100.58443,100.52854, 100.53642, 100.53843, 100.53061, 100.58309, 100.5224,100.53361), position_lat_4 = c(13.7487, 13.72082, 13.7449, 13.73238,13.76578, 13.73429, 13.74567, 13.73556, 13.72415, 13.74591),position_lon_5 = c(100.54841, 100.54585, 100.59693, 100.51346,100.54466, 100.54338, 100.53061, 100.58449, 100.5156, 100.53465), position_lat_5 = c(13.74902, 13.74356, 13.74322, 13.73616,13.76328, 13.76289, 13.74567, 13.73958, 13.71989, 13.74595), position_lon_6 = c(100.54841, 100.54585, 100.59693, 100.51346,100.54466, 100.54338, 100.53061, 100.58449, 100.5156, 100.53465), position_lat_6 = c(13.74902, 13.74356, 13.74322, 13.73616,13.76328, 13.76289, 13.74567, 13.73958, 13.71989, 13.74595), start_time1 = c(6.72472222222222, 6.82194444444444, 7.57361111111111,6.56611111111111, 6.31111111111111, 6.29916666666667, 6.59555555555556,16.3666666666667, 16.1305555555556, 16.0111111111111), end_time1 = c(6.82027777777778,7.30527777777778, 7.79777777777778, 7.13111111111111, 6.66111111111111,6.89916666666667, 6.77388888888889, 16.55, 16.6925, 16.7827777777778), length = c(835.421673262, 2857.08440862, 2003.94967386,4057.83640791, 2026.71013038, 5435.16534982, 2083.02205084,844.774412796, 3727.75857424, 2367.29993395), purpose = c("Bank/ATM","Business", "Infrastructure/Hydro", "Service", "Bank/ATM","Automobile", "Food", "Food", "Food", "Business"), clusterNum = c(2,2, 1, 3, 2, 2, 2, 4, 3, 1)), row.names = c(NA, -10L), class = c("tbl_df","tbl", "data.frame"))
I want to create a new data frame by converting rows data into new columns. This is what my expected output will look like this;
Any packages and functions from R can achieve this? Thank you for any helps.
Upvotes: 1
Views: 78
Reputation: 42544
For the sake of completeness, here is a solution which uses the melt()
and the measure()
functions (new with data.table version 1.14.1):
library(data.table) # development version 1.14.1 used
long <- melt(setDT(example), measure.vars =
measure(value.name, position, pattern = "(lon|lat)_(\\d)"))[order(trip_id)]
head(long, 12)
trip_id start_time1 end_time1 length purpose clusterNum position lon lat 1: 1 6.724722 6.820278 835.4217 Bank/ATM 2 1 100.5423 13.74440 2: 1 6.724722 6.820278 835.4217 Bank/ATM 2 2 100.5477 13.74409 3: 1 6.724722 6.820278 835.4217 Bank/ATM 2 3 100.5483 13.74762 4: 1 6.724722 6.820278 835.4217 Bank/ATM 2 4 100.5484 13.74870 5: 1 6.724722 6.820278 835.4217 Bank/ATM 2 5 100.5484 13.74902 6: 1 6.724722 6.820278 835.4217 Bank/ATM 2 6 100.5484 13.74902 7: 2 6.821944 7.305278 2857.0844 Business 2 1 100.5388 13.71867 8: 2 6.821944 7.305278 2857.0844 Business 2 2 100.5382 13.72446 9: 2 6.821944 7.305278 2857.0844 Business 2 3 100.5408 13.72764 10: 2 6.821944 7.305278 2857.0844 Business 2 4 100.5592 13.72082 11: 2 6.821944 7.305278 2857.0844 Business 2 5 100.5459 13.74356 12: 2 6.821944 7.305278 2857.0844 Business 2 6 100.5459 13.74356
If required, the column order can be changed by
setcolorder(long, c("trip_id", "lon", "lat", "position"))[]
head(long, 12)
trip_id lon lat position start_time1 end_time1 length purpose clusterNum 1: 1 100.5423 13.74440 1 6.724722 6.820278 835.4217 Bank/ATM 2 2: 1 100.5477 13.74409 2 6.724722 6.820278 835.4217 Bank/ATM 2 3: 1 100.5483 13.74762 3 6.724722 6.820278 835.4217 Bank/ATM 2 4: 1 100.5484 13.74870 4 6.724722 6.820278 835.4217 Bank/ATM 2 5: 1 100.5484 13.74902 5 6.724722 6.820278 835.4217 Bank/ATM 2 6: 1 100.5484 13.74902 6 6.724722 6.820278 835.4217 Bank/ATM 2 7: 2 100.5388 13.71867 1 6.821944 7.305278 2857.0844 Business 2 8: 2 100.5382 13.72446 2 6.821944 7.305278 2857.0844 Business 2 9: 2 100.5408 13.72764 3 6.821944 7.305278 2857.0844 Business 2 10: 2 100.5592 13.72082 4 6.821944 7.305278 2857.0844 Business 2 11: 2 100.5459 13.74356 5 6.821944 7.305278 2857.0844 Business 2 12: 2 100.5459 13.74356 6 6.821944 7.305278 2857.0844 Business 2
Upvotes: 0
Reputation: 21908
I hope this is what you have in mind:
library(dplyr)
library(tidyr)
example %>%
pivot_longer(starts_with("position"), names_to = c(NA, ".value", "position"),
names_pattern = "(\\w+)_(\\w+)_(\\d)")
# A tibble: 60 x 9
trip_id start_time1 end_time1 length purpose clusterNum position lon lat
<dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
1 1 6.72 6.82 835. Bank/ATM 2 1 101. 13.7
2 1 6.72 6.82 835. Bank/ATM 2 2 101. 13.7
3 1 6.72 6.82 835. Bank/ATM 2 3 101. 13.7
4 1 6.72 6.82 835. Bank/ATM 2 4 101. 13.7
5 1 6.72 6.82 835. Bank/ATM 2 5 101. 13.7
6 1 6.72 6.82 835. Bank/ATM 2 6 101. 13.7
7 2 6.82 7.31 2857. Business 2 1 101. 13.7
8 2 6.82 7.31 2857. Business 2 2 101. 13.7
9 2 6.82 7.31 2857. Business 2 3 101. 13.7
10 2 6.82 7.31 2857. Business 2 4 101. 13.7
# ... with 50 more rows
Upvotes: 1
Reputation: 2164
What you're looking for is a way to convert columns to rows (and not the other way around), but in a way that keeps the "position" identifier and stores longitude and latitude in separate columns. The pivot_longer
function from tidyr allows you to do that! Check the documentation (here) for a detailed explanation of the different arguments.
library(tidyverse)
example %>%
pivot_longer(
starts_with("position"),names_sep = "_", names_to = c(NA, ".value", "pos")
)
#> # A tibble: 60 x 9
#> trip_id start_time1 end_time1 length purpose clusterNum pos lon lat
#> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
#> 1 1 6.72 6.82 835. Bank/ATM 2 1 101. 13.7
#> 2 1 6.72 6.82 835. Bank/ATM 2 2 101. 13.7
#> 3 1 6.72 6.82 835. Bank/ATM 2 3 101. 13.7
#> 4 1 6.72 6.82 835. Bank/ATM 2 4 101. 13.7
#> 5 1 6.72 6.82 835. Bank/ATM 2 5 101. 13.7
#> 6 1 6.72 6.82 835. Bank/ATM 2 6 101. 13.7
#> 7 2 6.82 7.31 2857. Business 2 1 101. 13.7
#> 8 2 6.82 7.31 2857. Business 2 2 101. 13.7
#> 9 2 6.82 7.31 2857. Business 2 3 101. 13.7
#> 10 2 6.82 7.31 2857. Business 2 4 101. 13.7
#> # ... with 50 more rows
Upvotes: 1
Reputation: 27732
This might do the trick for you
library(tidyverse)
example %>%
pivot_longer(cols = starts_with("position"),
names_to = c(".value", "position"),
names_pattern = "^position_(.*)_(.)",
values_drop_na = TRUE)
Upvotes: 1