Reputation: 407
So I have file kit.txt like this (it's all on one line):
"bbc_a#22292", "bbc_b#22293", "desert8#5394", "city4#2525",
And I have stores.txt like this (it's all in one line):
["22292"] = {"bbc/images/auction.jpg#567482", "bbc/images/station.jpg#517482"}, ["5394"] = {"bbc/images/sunrise.jpg#567499"}, ["432112"] = {"bbc/images/sunset.mp3#53209", "bbc/images/alert.mp3#51109"},
And I have area.txt like this (one row per line):
bbc/images/auction.jpg;52
bbc/images/sunrise.jpg;38
bbc/images/stars.jpg;46
What I'm trying to do is, for each row in kit.txt, the ID from kit.txt should be looked up in stores.txt (first column in braces). If the ID exists, the title from stores.txt (the text before the hash symbol) should be looked up in area.txt. If the title exists in area.txt, then it should be output to a file like this with the title, stores ID and area ID shown (one row per line):
"bbc/images/auction.jpg#567482#52",
"bbc/images/sunrise.jpg#567499#38",
Using the quotes and commas as shown. So only auction and sunrise are included because only they exist in both kit.txt and stores.txt.
How can I do this in R?
Thank you.
Upvotes: 1
Views: 80
Reputation: 389095
You need to perform some cleaning of all the files so that they are in usable format.
library(dplyr)
library(tidyr)
#Read and clean 'kit.txt'
read.table('kit.txt', sep = ';') %>%
separate_rows(V1, sep = ',\\s+') %>%
separate(V1, c('V1', 'V2'), sep = '#') %>%
mutate(V2 = sub(',$', '', V2)) -> d1
d1
# A tibble: 4 x 2
# V1 V2
# <chr> <chr>
#1 bbc_a 22292
#2 bbc_b 22293
#3 desert8 5394
#4 city4 2525
#Read and clean 'stores.txt'
read.table('stores.txt', sep = '@') %>%
separate_rows(V1, sep = ',\\s\\[') %>%
separate(V1, c('V1', 'V2'), sep = '\\s=\\s') %>%
separate_rows(V2, sep = ',\\s') %>%
mutate(V2 = gsub('\\{|\\}', '', V2),
V1 = trimws(gsub('\\[|\\]', '', V1))) %>%
separate(V2, c('V2', 'V3'), sep = '#') %>%
mutate(V3 = sub(',$', '',V3)) -> d2
d2
# V1 V2 V3
# <chr> <chr> <chr>
#1 22292 bbc/images/auction.jpg 567482
#2 22292 bbc/images/station.jpg 517482
#3 5394 bbc/images/sunrise.jpg 567499
#4 432112 bbc/images/sunset.mp3 53209
#5 432112 bbc/images/alert.mp3 51109
#read area.txt
d3 <- read.table('area.txt', sep = ';')
d3
# V1 V2
#1 bbc/images/auction.jpg 52
#2 bbc/images/sunrise.jpg 38
#3 bbc/images/stars.jpg 46
#combine d1, d2 and d3
d1 %>%
inner_join(d2, by = c('V2' = 'V1')) %>%
inner_join(d3, by = c('V2.y' = 'V1')) -> result
result
# A tibble: 2 x 5
# V1 V2.x V2.y V3 V2.y.y
# <chr> <chr> <chr> <chr> <int>
#1 bbc_a 22292 bbc/images/auction.jpg 567482 52
#2 desert8 5394 bbc/images/sunrise.jpg 567499 38
#Write the file
cat(sprintf('"%s#%s#%s",', result$V2.y, result$V3, result$V2.y.y), file = 'result.lua', sep = '\n')
For the example shared, the same output can be achieved without using 'kit.txt'
as well.
Upvotes: 2
Reputation: 543
Here is my attempt.
$ cat file1.txt
"bbc_a#22292", "bbc_b#22293", "desert8#5394", "city4#2525"
$ cat file2.txt
["22292"] = {"bbc/images/auction.jpg#567482"}, ["5394"] = {"bbc/images/sunrise.jpg#567499"}, ["432112"] = {"bbc/images/sunset.mp3#53209"}
$ cat file3.txt
bbc/images/auction.jpg;52
bbc/images/sunrise.jpg;38
bbc/images/stars.jpg;46
# Reading in the files as vectors.
v1 <- scan("file1.txt",what = character(),sep=",")
v2 <- scan("file2.txt",what=character(),sep=",")
v3 <- scan("file3.txt",what = character(),sep="\n")
# Making dataframes from the vectors
library(stringr)
df1 <- str_split_fixed(v1,"#",2)
df2 <- str_split_fixed(v2,"=",2)
df3 <- str_split_fixed(v3,";",2)
> df1
[,1] [,2]
[1,] "bbc_a" "22292"
[2,] " bbc_b" "22293"
[3,] " desert8" "5394"
[4,] " city4" "2525"
> df2
[,1] [,2]
[1,] "[22292] " " {bbc/images/auction.jpg#567482}"
[2,] " [5394] " " {bbc/images/sunrise.jpg#567499}"
[3,] " [432112] " " {bbc/images/sunset.mp3#53209}"
> df3
[,1] [,2]
[1,] "bbc/images/auction.jpg" "52"
[2,] "bbc/images/sunrise.jpg" "38"
[3,] "bbc/images/stars.jpg" "46"
>
library(dplyr)
df1 <- mutate_all(data.frame(df1),list(trimws))
> df1
X1 X2
1 bbc_a 22292
2 bbc_b 22293
3 desert8 5394
4 city4 2525
>
df2 <- mutate_all(data.frame(df2), list(trimws))
> df2
X1 X2
1 [22292] {bbc/images/auction.jpg#567482}
2 [5394] {bbc/images/sunrise.jpg#567499}
3 [432112] {bbc/images/sunset.mp3#53209}
>
df2 <- mutate_all(df2,str_sub,2,-2)
> df2
X1 X2
1 22292 bbc/images/auction.jpg#567482
2 5394 bbc/images/sunrise.jpg#567499
3 432112 bbc/images/sunset.mp3#53209
>
df4 <- str_split_fixed(df2$X2,"#",2)
> df4
[,1] [,2]
[1,] "bbc/images/auction.jpg" "567482"
[2,] "bbc/images/sunrise.jpg" "567499"
[3,] "bbc/images/sunset.mp3" "53209"
>
df2<- df2[,1]
df2 <- cbind(df2,df4)
> df2
df2
[1,] "22292" "bbc/images/auction.jpg" "567482"
[2,] "5394" "bbc/images/sunrise.jpg" "567499"
[3,] "432112" "bbc/images/sunset.mp3" "53209"
>
kit <- as_tibble(df1)
colnames(df2)<-NULL
store <- as_tibble(df2)
area <- as_tibble(df3)
> kit
# A tibble: 4 x 2
X1 X2
<chr> <chr>
1 bbc_a 22292
2 bbc_b 22293
3 desert8 5394
4 city4 2525
> store
# A tibble: 3 x 3
V1 V2 V3
<chr> <chr> <chr>
1 22292 bbc/images/auction.jpg 567482
2 5394 bbc/images/sunrise.jpg 567499
3 432112 bbc/images/sunset.mp3 53209
> area
# A tibble: 3 x 2
V1 V2
<chr> <chr>
1 bbc/images/auction.jpg 52
2 bbc/images/sunrise.jpg 38
3 bbc/images/stars.jpg 46
>
j1 <- inner_join(kit,store,by =c("X2"="V1"))
> j1
# A tibble: 2 x 4
X1 X2 V2 V3
<chr> <chr> <chr> <chr>
1 bbc_a 22292 bbc/images/auction.jpg 567482
2 desert8 5394 bbc/images/sunrise.jpg 567499
>
j2 <- inner_join(j1,area,by = c("V2"="V1" ))
> j2
# A tibble: 2 x 5
X1 X2 V2 V3 V2.y
<chr> <chr> <chr> <chr> <chr>
1 bbc_a 22292 bbc/images/auction.jpg 567482 52
2 desert8 5394 bbc/images/sunrise.jpg 567499 38
>
answer <- j2[,c(3:5)]
> answer
# A tibble: 2 x 3
V2 V3 V2.y
<chr> <chr> <chr>
1 bbc/images/auction.jpg 567482 52
2 bbc/images/sunrise.jpg 567499 38
>
colnames(answer)<- c("C1","C2","C3")
paste(answer$C1,answer$C2,answer$C3,sep="#")
[1] "bbc/images/auction.jpg#567482#52" "bbc/images/sunrise.jpg#567499#38
Upvotes: 1