Reputation: 478
The following is a small sample of my data frame:
> print(df1)
Section A B
1 ./100-12-S00.xlsx 0.6032591 0.4554364
2 ./108-15-S01.xlsx 0.7109148 0.5367121
3 ./92-12-S00.xlsx 0.6032591 0.4554364
4 ./124-9-S05.xlsx 0.7109148 0.5367121
5 ./116-15-S11.xlsx 0.6032591 0.4554364
6 ./108-9-S06.xlsx 0.7109148 0.5367121
7 ./84-12-S08.xlsx 0.6032591 0.4554364
8 ./124-15-S11.xlsx 0.7109148 0.5367121
9 ./92-12-S00.xlsx 0.6032591 0.4554364
10 ./116-15-S03.xlsx 0.7109148 0.5367121
11 ./100-12-S02.xlsx 0.6032591 0.4554364
12 ./84-9-S05.xlsx 0.7109148 0.5367121
I would like to add a new column of texts to df1 based on the characters in the Section column. This must be done such that if Section includes:
> print(df2)
Section Names
1 S00 Baseline
2 S01 Sample1
3 S02 Sample2
4 S03 Sample3
5 S04 Sample4
6 S05 Sample5
7 S06 Sample6
8 S07 Sample7
9 S08 Sample8
10 S09 Sample9
11 S10 Sample10
12 S11 AASHTO
I want df1 to include a new column called Names based on what is in the Section column.
The expected output is:
> print(df3)
Section A B Names
1 ./100-12-S00.xlsx 0.6032591 0.4554364 Baseline
2 ./108-15-S01.xlsx 0.7109148 0.5367121 Sample1
3 ./92-12-S00.xlsx 0.6032591 0.4554364 Baseline
4 ./124-9-S05.xlsx 0.7109148 0.5367121 Sample5
5 ./116-15-S11.xlsx 0.6032591 0.4554364 AASHTO
6 ./108-9-S06.xlsx 0.7109148 0.5367121 Sample6
7 ./84-12-S08.xlsx 0.6032591 0.4554364 Sample8
8 ./124-15-S11.xlsx 0.7109148 0.5367121 AASHTO
9 ./92-12-S00.xlsx 0.6032591 0.4554364 Baseline
10 ./116-15-S03.xlsx 0.7109148 0.5367121 Sample3
11 ./100-12-S02.xlsx 0.6032591 0.4554364 Sample2
12 ./84-9-S05.xlsx 0.7109148 0.5367121 Sample5
Upvotes: 1
Views: 40
Reputation: 887088
We can create a new column by extracting the substring in 'df1' and do a left
join with 'df2'
library(dplyr)
library(stringr)
df1 %>%
mutate(Section2 = str_extract(Section, "S\\d+")) %>%
left_join(df2, by = c("Section2" = "Section")) %>%
select(-Section2)
# Section A B Names
#1 ./100-12-S00.xlsx 0.6032591 0.4554364 Baseline
#2 ./108-15-S01.xlsx 0.7109148 0.5367121 Sample1
#3 ./92-12-S00.xlsx 0.6032591 0.4554364 Baseline
#4 ./124-9-S05.xlsx 0.7109148 0.5367121 Sample5
#5 ./116-15-S11.xlsx 0.6032591 0.4554364 AASHTO
#6 ./108-9-S06.xlsx 0.7109148 0.5367121 Sample6
#7 ./84-12-S08.xlsx 0.6032591 0.4554364 Sample8
#8 ./124-15-S11.xlsx 0.7109148 0.5367121 AASHTO
#9 ./92-12-S00.xlsx 0.6032591 0.4554364 Baseline
#10 ./116-15-S03.xlsx 0.7109148 0.5367121 Sample3
#11 ./100-12-S02.xlsx 0.6032591 0.4554364 Sample2
#12 ./84-9-S05.xlsx 0.7109148 0.5367121 Sample5
Or without much effort, use regex_left_join
from fuzzyjoin
library(fuzzyjoin)
regex_left_join(df1, df2, by = 'Section') %>%
select(Section = Section.x, A, B, Names)
# A tibble: 12 x 4
# Section A B Names
# <chr> <dbl> <dbl> <chr>
# 1 ./100-12-S00.xlsx 0.603 0.455 Baseline
# 2 ./108-15-S01.xlsx 0.711 0.537 Sample1
# 3 ./92-12-S00.xlsx 0.603 0.455 Baseline
# 4 ./124-9-S05.xlsx 0.711 0.537 Sample5
# 5 ./116-15-S11.xlsx 0.603 0.455 AASHTO
# 6 ./108-9-S06.xlsx 0.711 0.537 Sample6
# 7 ./84-12-S08.xlsx 0.603 0.455 Sample8
# 8 ./124-15-S11.xlsx 0.711 0.537 AASHTO
# 9 ./92-12-S00.xlsx 0.603 0.455 Baseline
#10 ./116-15-S03.xlsx 0.711 0.537 Sample3
#11 ./100-12-S02.xlsx 0.603 0.455 Sample2
#12 ./84-9-S05.xlsx 0.711 0.537 Sample5
df1 <- structure(list(Section = c("./100-12-S00.xlsx", "./108-15-S01.xlsx",
"./92-12-S00.xlsx", "./124-9-S05.xlsx", "./116-15-S11.xlsx",
"./108-9-S06.xlsx", "./84-12-S08.xlsx", "./124-15-S11.xlsx",
"./92-12-S00.xlsx", "./116-15-S03.xlsx", "./100-12-S02.xlsx",
"./84-9-S05.xlsx"), A = c(0.6032591, 0.7109148, 0.6032591, 0.7109148,
0.6032591, 0.7109148, 0.6032591, 0.7109148, 0.6032591, 0.7109148,
0.6032591, 0.7109148), B = c(0.4554364, 0.5367121, 0.4554364,
0.5367121, 0.4554364, 0.5367121, 0.4554364, 0.5367121, 0.4554364,
0.5367121, 0.4554364, 0.5367121)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
df2 <- structure(list(Section = c("S00", "S01", "S02", "S03", "S04",
"S05", "S06", "S07", "S08", "S09", "S10", "S11"), Names = c("Baseline",
"Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6",
"Sample7", "Sample8", "Sample9", "Sample10", "AASHTO")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
Upvotes: 1