Reputation: 5169
I have the following data frame which contains a list of files.
library(tidyverse)
dat <- structure(list(source_file = structure(c("data/monroe_20180214/180131 WT PB d5/PB x10_01.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_02.tif", "data/monroe_20180214/180131 WT PB d5/PB x10_03.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_04.tif", "data/monroe_20180214/180131 WT PB d5/PB x10_05.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_06.tif"), class = c("fs_path",
"character"))), .Names = "source_file", row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))
dat
#> # A tibble: 6 x 1
#> source_file
#> <chr>
#> 1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif
#> 2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif
#> 3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif
#> 4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif
#> 5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif
#> 6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif
What I want to do is to create second column new_filename
by replacing the first two directory paths with new path pooled/
and replace whitespace with .
, backslash with __
. How can I achieve that?
The desired result is
source_file new_filename
1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif pooled/180131.WT.PB.d5__PB.x10_01.tif
2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif ...
3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif .etc.
4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif
5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif
6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif
Upvotes: 0
Views: 76
Reputation: 388982
One liner:
paste0("pooled/",chartr(" /", "._",(sub("^(?:[^\\/]*\\/){2}","",dat$source_file))))
#[1] "pooled/180131.WT.PB.d5_PB.x10_01.tif"
#[2] "pooled/180131.WT.PB.d5_PB.x10_02.tif"
#[3] "pooled/180131.WT.PB.d5_PB.x10_03.tif"
#[4] "pooled/180131.WT.PB.d5_PB.x10_04.tif"
#[5] "pooled/180131.WT.PB.d5_PB.x10_05.tif"
#[6] "pooled/180131.WT.PB.d5_PB.x10_06.tif"
Here we first replace the part with first two occurrence of /
with empty strings(""
), then use chartr
function from base R to replace spaces with dot (.
) and forward slash (/
) with underscore (_
) and then paste
the string with pooled/
.
The regex for sub
part has been taken from here.
Adding this in dplyr
call :
dat %>%
mutate(new_filename =paste0("pooled/", chartr(" /", "._",
(sub("^(?:[^\\/]*\\/){2}", "", source_file))))) %>%
select(new_filename)
#new_filename
# <chr>
#1 pooled/180131.WT.PB.d5_PB.x10_01.tif
#2 pooled/180131.WT.PB.d5_PB.x10_02.tif
#3 pooled/180131.WT.PB.d5_PB.x10_03.tif
#4 pooled/180131.WT.PB.d5_PB.x10_04.tif
#5 pooled/180131.WT.PB.d5_PB.x10_05.tif
#6 pooled/180131.WT.PB.d5_PB.x10_06.tif
Upvotes: 2
Reputation: 903
With gsub()
from string
you can do it as well
dat %>% mutate(new_var = gsub("data/monroe_20180214", "pooled", source_file),
+ new_var = gsub(" ", ".", new_var),
+ new_var = gsub("/", "_", new_var),
+ new_var = gsub("pooled_", "pooled/", new_var))
# A tibble: 6 x 2
source_file new_var
<chr> <chr>
1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif pooled/180131.WT.PB.d5_PB.x10_01.tif
2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif pooled/180131.WT.PB.d5_PB.x10_02.tif
3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif pooled/180131.WT.PB.d5_PB.x10_03.tif
4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif pooled/180131.WT.PB.d5_PB.x10_04.tif
5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif pooled/180131.WT.PB.d5_PB.x10_05.tif
6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif pooled/180131.WT.PB.d5_PB.x10_06.tif
Upvotes: 2