Reputation: 2097
library(tidyverse)
The code provided below creates three dataframes - Main, LookUp and Final. I'm trying to use the Main and LookUp dataframes to create the Final dataframe.
For example, the Final table only keeps the "Sections" that have the number provided in Section_Lookup of the LookUp table, while also keeping the corresponding "Title" variable.
I would like to use tidyverse as much as possible. Most of my attempts have been along the lines of the code below. I'm thinking that using two loops, or purrr, will allow me to loop over the Main and LookUp tables. This is more advanced than what I usually attempt, so I would like some help on how to proceed and work with this type of situation.
New<-map(Main, function(x) {
map(LookUp, function(y) if_else(x$Title1==y$Title_Lookup & ...x$Section1 CONTAINS Y SECTION_LOOKUP... ) )}),
Sample code is below:
Main Dataframe:
Title1<-c("101A", "101A", "101A", "101A", "101A", "101A", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "400B", "400B", "400B", "400B", "200A", "200A", "250D", "250D", "250D", "250D")
Section1<-c("2A", "2A", "2B", "2B", "2B", "2C", "2A", "2A", "4A", "4A", "4A", "4B", "4B", "4C", "4C", "4C", "4C", "4D", "4D", "2A", "2A", "2B", "2B", "2A", "6A", "1A", "1B", "2A", "2A")
Main<-data_frame(Title1,Section1)
LookUp Table:
Title_Lookup<-c("101A", "203S", "203S", "400B", "200A", "200A", "250D")
Section_Lookup<-c(2, 2, 4, 2, 2, 6, 2)
LookUp<-data_frame(Title_Lookup,Section_Lookup)
Final Dataframe:
Section_Final<-c("2A", "2B", "2C", "2A", "4A", "4B", "4C", "4D", "2A", "2B", "2A", "6A", "2A")
Title_Final<-c("101A", "101A", "101A", "203S", "203S", "203S", "203S", "203S", "400B", "400B", "200A", "200A", "250D")
Final<-data_frame(Title_Final,Section_Final)
Upvotes: 0
Views: 125
Reputation: 20095
Another approach could be based on joining only on Section
column.
library(dplyr)
Name1<-c("Name1", "Name2", "Name3", "Name4", "Name5", "Name6", "Name7", "Name8", "Name9",
"Name10", "Name11", "Name12", "Name13", "Name14", "Name15", "Name16", "Name17",
"Name18", "Name19", "Name20", "Name21", "Name22", "Name23", "Name24", "Name25",
"Name26", "Name27", "Name28", "Name29")
Code<-c(10123, 13432, 34554, 45563, 43666, 54444, 55322, 52111, 33443, 88998, 54554,
33455, 65889, 88888, 22344, 54455, 66655, 22222, 65564, 77677, 65545, 67765,
34334, 88789, 76776, 67765, 55555, 65445, 65665)
Title1<-c("101A", "101A", "101A", "101A", "101A", "101A", "203S", "203S", "203S", "203S",
"203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "400B",
"400B", "400B", "400B", "200A", "200A", "250D", "250D", "250D", "250D")
Section1<-c("2A", "2A", "2B", "2B", "2B", "2C", "2A", "2A", "4A", "4A", "4A", "4B", "4B",
"4C", "4C", "4C", "4C", "4D", "4D", "2A", "2A", "2B", "2B", "2A", "6A", "1A",
"1B", "2A", "2A")
Main<-data_frame(Name1,Code,Title1,Section1)
Title_Lookup<-c("101A", "203S", "203S", "400B", "200A", "200A", "250D")
Section_Lookup<-c(2, 2, 4, 2, 2, 6, 2)
LookUp<-data_frame(Title_Lookup,Section_Lookup)
#create data.frame of distinct Sections
df_sections <- distinct(LookUp, Section_Lookup) %>% as.data.frame()
#Use filter to select those records having matching numeric value in Section
filter(Main, as.numeric(gsub("([0-9]).*","\\1",Section1)) %in% df$Section_Lookup) %>%
select(Title1, Section1) %>% distinct()
#The result:
# A tibble: 13 x 2
Title1 Section1
<chr> <chr>
1 101A 2A
2 101A 2B
3 101A 2C
4 203S 2A
5 203S 4A
6 203S 4B
7 203S 4C
8 203S 4D
9 400B 2A
10 400B 2B
11 200A 2A
12 200A 6A
13 250D 2A
Upvotes: 0
Reputation: 39154
A solution using tidyverse. The str_replace
function is from stringr, which is part of the tidyverse. If you just want to load the dplyr package, you can use sub("\\D+$", "", Section1)
instead of str_replace
.
library(tidyverse)
Main2 <- Main %>%
mutate(Number = as.numeric(str_replace(Section1, "\\D+$", ""))) %>%
semi_join(LookUp, by = c("Title1" = "Title_Lookup",
"Number" = "Section_Lookup")) %>%
select(Title_Final = Title1, Section_Final = Section1) %>%
distinct()
Main2
# # A tibble: 13 x 2
# Title_Final Section_Final
# <chr> <chr>
# 1 101A 2A
# 2 101A 2B
# 3 101A 2C
# 4 203S 2A
# 5 203S 4A
# 6 203S 4B
# 7 203S 4C
# 8 203S 4D
# 9 400B 2A
# 10 400B 2B
# 11 200A 2A
# 12 200A 6A
# 13 250D 2A
Upvotes: 1
Reputation: 10855
Here is a solution based on the sqldf
package, taking advantage of charindex()
to see whether the string in Section_Lookup is present in Section1.
library(tidyverse)
Title1<-c("101A", "101A", "101A", "101A", "101A", "101A", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "203S", "400B", "400B", "400B", "400B", "200A", "200A", "250D", "250D", "250D", "250D")
Section1<-c("2A", "2A", "2B", "2B", "2B", "2C", "2A", "2A", "4A", "4A", "4A", "4B", "4B", "4C", "4C", "4C", "4C", "4D", "4D", "2A", "2A", "2B", "2B", "2A", "6A", "1A", "1B", "2A", "2A")
Main<-data_frame(Title1,Section1)
Title_Lookup<-c("101A", "203S", "203S", "400B", "200A", "200A", "250D")
Section_Lookup<-as.character(c(2, 2, 4, 2, 2, 6, 2))
LookUp<-data_frame(Title_Lookup,Section_Lookup)
sqlQuery <- "select distinct a.Title1 as Title, a.Section1 as Section
from Main as a
left join LookUp as b
where
a.Title1 = b.Title_Lookup and
charindex(b.Section_Lookup,a.Section1) > 0"
sqldf(sqlQuery)
...and the output.
> sqldf(sqlQuery)
Title Section
1 101A 2A
2 101A 2B
3 101A 2C
4 203S 2A
5 203S 4A
6 203S 4B
7 203S 4C
8 203S 4D
9 400B 2A
10 400B 2B
11 200A 2A
12 200A 6A
13 250D 2A
>
Upvotes: 0