Feixiang Sun
Feixiang Sun

Reputation: 117

Match and add data based on another data frame in r

I have a data frame named A which are company names and cities, like

A:

name    city 
IBM      NY
AMZ      NY
ALI      SH
TAOBAO   London

Also a data frame B

B:

ID       NAME1    CITY1   NAME2   CITY2
1        LENVO            AMZ
2        APPLE            HUAWEI  
3        AMZ              ALI
4        XIAOMI           VIVO
5        IBM              GOOGLE

In data B, I have two columns of company names, accordingly, two city columns which are blank (there are 10k in my real dataset with 6 name-city pairs) , what I want is that get the city name in dataset B from dataset A based on the name match, which the result C should be as below in this example: C

ID       NAME1    CITY1   NAME2   CITY2
1        LENVO            AMZ     NY
2        APPLE            HUAWEI   
3        AMZ      NY      ALI     SH
4        XIAOMI           TAOBAO  London
5        IBM      NY      GOOGLE

how to make it? Thanks in advance for any suggestion!

Upvotes: 1

Views: 56

Answers (3)

Abdessabour Mtk
Abdessabour Mtk

Reputation: 3888

Using pivot_longer/pivot_wider

library(tidyverse)

df %>%
 select(-starts_with("CITY")) %>%
 pivot_longer(!ID, names_prefix="NAME") %>%
 left_join(df_cities, by=c("value"="name")) %>%
 pivot_wider(values_from=c("value", "city"))
Joining, by = "name"
# A tibble: 5 x 5
     ID name_1 name_2 city_1 city_2
  <int> <fct>  <fct>  <fct>  <fct> 
1     1 LENVO  AMZ    NA     NY    
2     2 APPLE  HUAWEI NA     NA    
3     3 AMZ    ALI    NY     SH    
4     4 XIAOMI VIVO   NA     NA    
5     5 IBM    GOOGLE NY     NA   

DATA

read.table(text="name city
IBM NY
AMZ NY
ALI SH
TAOBAO London", header=T) -> df_cities
read.table(text="ID NAME1 CITY1 NAME2 CITY2
1 LENVO '' AMZ ''
2 APPLE '' HUAWEI ''
3 AMZ '' ALI ''
4 XIAOMI '' VIVO ''
5 IBM '' GOOGLE ''", header=T) -> df

Upvotes: 0

Ronak Shah
Ronak Shah

Reputation: 388862

You can use match over 'NAME' columns to get corresponding city names from A dataframe.

city_cols <- grep('CITY', names(B)) 
name_cols <- grep('NAME', names(B)) 

B[cols] <- lapply(B[name_cols], function(x) A$city[match(x, A$name)])
B

#  ID  NAME1 CITY1  NAME2 CITY2
#1  1  LENVO  <NA>    AMZ    NY
#2  2  APPLE  <NA> HUAWEI  <NA>
#3  3    AMZ    NY    ALI    SH
#4  4 XIAOMI  <NA>   VIVO  <NA>
#5  5    IBM    NY GOOGLE  <NA>

data

A <- structure(list(name = c("IBM", "AMZ", "ALI", "TAOBAO"), city = c("NY", 
"NY", "SH", "London")), class = "data.frame", row.names = c(NA, -4L))

B <- structure(list(ID = 1:5, NAME1 = c("LENVO", "APPLE", "AMZ", "XIAOMI", 
"IBM"), CITY1 = c("", "", "", "", ""), NAME2 = c("AMZ", "HUAWEI", 
"ALI", "VIVO", "GOOGLE"), CITY2 = c("", "", "", "", "")), 
row.names = c(NA, -5L), class = "data.frame")

Upvotes: 1

r2evans
r2evans

Reputation: 160417

Base R

B <- merge(B, A, by.x = "NAME1", by.y = "name", all.x = TRUE)
B$CITY1 <- B$city
B$city <- NULL
B <- merge(B, A, by.x = "NAME2", by.y = "name", all.x = TRUE)
B$CITY2 <- B$city
B$city <- NULL
B
#    NAME2  NAME1 ID CITY1 CITY2
# 1    ALI    AMZ  3    NY    SH
# 2    AMZ  LENVO  1  <NA>    NY
# 3 GOOGLE    IBM  5    NY  <NA>
# 4 HUAWEI  APPLE  2  <NA>  <NA>
# 5   VIVO XIAOMI  4  <NA>  <NA>
B <- B[,c("ID", "NAME1", "CITY1", "NAME2", "CITY2")]
B <- B[order(B$ID),]
B
#   ID  NAME1 CITY1  NAME2 CITY2
# 2  1  LENVO  <NA>    AMZ    NY
# 4  2  APPLE  <NA> HUAWEI  <NA>
# 1  3    AMZ    NY    ALI    SH
# 5  4 XIAOMI  <NA>   VIVO  <NA>
# 3  5    IBM    NY GOOGLE  <NA>

dplyr

library(dplyr)
left_join(B, A, by = c("NAME1" = "name")) %>%
  mutate(CITY1 = city) %>%
  select(-city) %>%
  left_join(A, by = c("NAME2" = "name")) %>%
  mutate(CITY2 = city) %>%
  select(-city)
#   ID  NAME1 CITY1  NAME2 CITY2
# 1  1  LENVO  <NA>    AMZ    NY
# 2  2  APPLE  <NA> HUAWEI  <NA>
# 3  3    AMZ    NY    ALI    SH
# 4  4 XIAOMI  <NA>   VIVO  <NA>
# 5  5    IBM    NY GOOGLE  <NA>

Upvotes: 1

Related Questions