Jstation
Jstation

Reputation: 407

How can I do multiple lookups in R?

So I have file kit.txt like this (it's all on one line):

"bbc_a#22292", "bbc_b#22293", "desert8#5394", "city4#2525",

And I have stores.txt like this (it's all in one line):

["22292"] = {"bbc/images/auction.jpg#567482", "bbc/images/station.jpg#517482"}, ["5394"] = {"bbc/images/sunrise.jpg#567499"}, ["432112"] = {"bbc/images/sunset.mp3#53209", "bbc/images/alert.mp3#51109"},

And I have area.txt like this (one row per line):

bbc/images/auction.jpg;52
bbc/images/sunrise.jpg;38
bbc/images/stars.jpg;46

What I'm trying to do is, for each row in kit.txt, the ID from kit.txt should be looked up in stores.txt (first column in braces). If the ID exists, the title from stores.txt (the text before the hash symbol) should be looked up in area.txt. If the title exists in area.txt, then it should be output to a file like this with the title, stores ID and area ID shown (one row per line):

"bbc/images/auction.jpg#567482#52",
"bbc/images/sunrise.jpg#567499#38",

Using the quotes and commas as shown. So only auction and sunrise are included because only they exist in both kit.txt and stores.txt.

How can I do this in R?

Thank you.

Upvotes: 1

Views: 80

Answers (2)

Ronak Shah
Ronak Shah

Reputation: 389095

You need to perform some cleaning of all the files so that they are in usable format.

library(dplyr)
library(tidyr)

#Read and clean 'kit.txt'
read.table('kit.txt', sep = ';') %>%
  separate_rows(V1, sep = ',\\s+') %>%
  separate(V1, c('V1', 'V2'), sep = '#') %>%
  mutate(V2 = sub(',$', '', V2)) -> d1

d1
# A tibble: 4 x 2
#  V1      V2   
#  <chr>   <chr>
#1 bbc_a   22292
#2 bbc_b   22293
#3 desert8 5394 
#4 city4   2525 

#Read and clean 'stores.txt'
read.table('stores.txt', sep = '@') %>%
  separate_rows(V1, sep = ',\\s\\[') %>%
  separate(V1, c('V1', 'V2'), sep = '\\s=\\s') %>%
  separate_rows(V2, sep = ',\\s') %>%
  mutate(V2 = gsub('\\{|\\}', '', V2), 
         V1 = trimws(gsub('\\[|\\]', '', V1))) %>%
  separate(V2, c('V2', 'V3'), sep = '#') %>%
  mutate(V3 = sub(',$', '',V3)) -> d2

d2
#   V1     V2                     V3    
#  <chr>  <chr>                  <chr> 
#1 22292  bbc/images/auction.jpg 567482
#2 22292  bbc/images/station.jpg 517482
#3 5394   bbc/images/sunrise.jpg 567499
#4 432112 bbc/images/sunset.mp3  53209 
#5 432112 bbc/images/alert.mp3   51109 

#read area.txt
d3 <- read.table('area.txt', sep = ';')

d3
#                      V1 V2
#1 bbc/images/auction.jpg 52
#2 bbc/images/sunrise.jpg 38
#3   bbc/images/stars.jpg 46

#combine d1, d2 and d3
d1 %>%
  inner_join(d2, by = c('V2' = 'V1')) %>%
  inner_join(d3, by = c('V2.y' = 'V1')) -> result

result

# A tibble: 2 x 5
#  V1      V2.x  V2.y                   V3     V2.y.y
#  <chr>   <chr> <chr>                  <chr>   <int>
#1 bbc_a   22292 bbc/images/auction.jpg 567482     52
#2 desert8 5394  bbc/images/sunrise.jpg 567499     38

#Write the file
cat(sprintf('"%s#%s#%s",', result$V2.y, result$V3, result$V2.y.y), file = 'result.lua', sep = '\n')

For the example shared, the same output can be achieved without using 'kit.txt' as well.

Upvotes: 2

user2338823
user2338823

Reputation: 543

Here is my attempt.

$ cat file1.txt
"bbc_a#22292", "bbc_b#22293", "desert8#5394", "city4#2525"
$ cat file2.txt
["22292"] = {"bbc/images/auction.jpg#567482"}, ["5394"] = {"bbc/images/sunrise.jpg#567499"}, ["432112"] = {"bbc/images/sunset.mp3#53209"}
$ cat file3.txt
bbc/images/auction.jpg;52
bbc/images/sunrise.jpg;38
bbc/images/stars.jpg;46



# Reading in the files as vectors.

v1 <- scan("file1.txt",what = character(),sep=",")
v2 <- scan("file2.txt",what=character(),sep=",")
v3 <- scan("file3.txt",what = character(),sep="\n")

# Making dataframes from the vectors

library(stringr)
df1 <- str_split_fixed(v1,"#",2)
df2 <- str_split_fixed(v2,"=",2)
df3 <- str_split_fixed(v3,";",2)


> df1
     [,1]       [,2]   
[1,] "bbc_a"    "22292"
[2,] " bbc_b"   "22293"
[3,] " desert8" "5394" 
[4,] " city4"   "2525" 
> df2
     [,1]         [,2]                              
[1,] "[22292] "   " {bbc/images/auction.jpg#567482}"
[2,] " [5394] "   " {bbc/images/sunrise.jpg#567499}"
[3,] " [432112] " " {bbc/images/sunset.mp3#53209}"  
> df3
     [,1]                     [,2]
[1,] "bbc/images/auction.jpg" "52"
[2,] "bbc/images/sunrise.jpg" "38"
[3,] "bbc/images/stars.jpg"   "46"
> 
library(dplyr)

df1 <- mutate_all(data.frame(df1),list(trimws))
> df1
       X1    X2
1   bbc_a 22292
2   bbc_b 22293
3 desert8  5394
4   city4  2525
> 

df2 <- mutate_all(data.frame(df2), list(trimws))
> df2
        X1                              X2
1  [22292] {bbc/images/auction.jpg#567482}
2   [5394] {bbc/images/sunrise.jpg#567499}
3 [432112]   {bbc/images/sunset.mp3#53209}
> 

df2 <- mutate_all(df2,str_sub,2,-2)
> df2
      X1                            X2
1  22292 bbc/images/auction.jpg#567482
2   5394 bbc/images/sunrise.jpg#567499
3 432112   bbc/images/sunset.mp3#53209
> 
df4 <- str_split_fixed(df2$X2,"#",2)
> df4
     [,1]                     [,2]    
[1,] "bbc/images/auction.jpg" "567482"
[2,] "bbc/images/sunrise.jpg" "567499"
[3,] "bbc/images/sunset.mp3"  "53209" 
> 
df2<- df2[,1]
df2 <- cbind(df2,df4)
> df2
     df2                                       
[1,] "22292"  "bbc/images/auction.jpg" "567482"
[2,] "5394"   "bbc/images/sunrise.jpg" "567499"
[3,] "432112" "bbc/images/sunset.mp3"  "53209" 
> 
kit <- as_tibble(df1)
colnames(df2)<-NULL
store <- as_tibble(df2)
area <- as_tibble(df3)
> kit
# A tibble: 4 x 2
  X1      X2   
  <chr>   <chr>
1 bbc_a   22292
2 bbc_b   22293
3 desert8 5394 
4 city4   2525 
> store
# A tibble: 3 x 3
  V1     V2                     V3    
  <chr>  <chr>                  <chr> 
1 22292  bbc/images/auction.jpg 567482
2 5394   bbc/images/sunrise.jpg 567499
3 432112 bbc/images/sunset.mp3  53209 
> area
# A tibble: 3 x 2
  V1                     V2   
  <chr>                  <chr>
1 bbc/images/auction.jpg 52   
2 bbc/images/sunrise.jpg 38   
3 bbc/images/stars.jpg   46   
> 
j1 <- inner_join(kit,store,by =c("X2"="V1"))

> j1
# A tibble: 2 x 4
  X1      X2    V2                     V3    
  <chr>   <chr> <chr>                  <chr> 
1 bbc_a   22292 bbc/images/auction.jpg 567482
2 desert8 5394  bbc/images/sunrise.jpg 567499
> 

j2 <- inner_join(j1,area,by = c("V2"="V1" ))

> j2
# A tibble: 2 x 5
  X1      X2    V2                     V3     V2.y 
  <chr>   <chr> <chr>                  <chr>  <chr>
1 bbc_a   22292 bbc/images/auction.jpg 567482 52   
2 desert8 5394  bbc/images/sunrise.jpg 567499 38   
> 
answer <- j2[,c(3:5)]
> answer
# A tibble: 2 x 3
  V2                     V3     V2.y 
  <chr>                  <chr>  <chr>
1 bbc/images/auction.jpg 567482 52   
2 bbc/images/sunrise.jpg 567499 38   
> 
colnames(answer)<- c("C1","C2","C3")

paste(answer$C1,answer$C2,answer$C3,sep="#")

[1] "bbc/images/auction.jpg#567482#52" "bbc/images/sunrise.jpg#567499#38

Upvotes: 1

Related Questions