nflore
nflore

Reputation: 306

How to paste two column strings with condition

I have a dataframe that looks like this:

structure(list(date = c("01dec2013", "01jul2003", "01nov2008", 
"01dec2017", "01dec2017", "01dec2003"), company = c("Shwe Taung", 
"PetroChina Exploration and Development", "Repsol SA", "Repsol SA", 
"Ipsen Pharmaceutical", "Ceva Laval"), parent_company = c("Shwe Taung", 
"China National Petroleum (CNPC)", "Repsol SA", "Repsol SA", 
"Ipsen Pharmaceutical", "Ceva Sante Animale"), Website = c("www.shwetaunggroup.com", 
"www.cnpc.com.cn", "www.repsol.com", "www.repsol.com", "www.ipsen.com", 
"www.ceva.com"), revenues_usd_ml = c(NA, 394554.53, 53215.45, 
53215.45, 1760.671, 967.152), Headcount = c(NA, 1396144L, 24634L, 
24634L, NA, 3500L), r_d_exp = c(NA, NA, 77.67, 77.67, NA, NA), 
    est_year = c(NA, 1988L, 1927L, 1927L, 1929L, 1989L), o_country = c("Myanmar", 
    "China", "Spain", "Spain", "France", "France"), o_state = c("Rangoon (Yangon)", 
    "Beijing Municipality", "Comunidad de Madrid", "Comunidad de Madrid", 
    "Ile-de-France", "Sud-Ouest (FR)"), o_admin = c("Not Specified", 
    "Not Specified", "Madrid", "Madrid", "Ile-de-France", "Not Specified"
    ), o_city = c("Rangoon (Yangon)", "Beijing", "Madrid", "Madrid", 
    "Paris", "Not Specified"), country = c("Algeria", "Algeria", 
    "Algeria", "Algeria", "Algeria", "Algeria"), state = c("Adrar", 
    "Adrar", "Adrar", "Adrar", "Adrar", "Adrar"), region = c("Not Specified", 
    "Not Specified", "Not Specified", "Not Specified", "Not Specified", 
    "Not Specified"), city = c("Adrar", "Adrar", "Reggane", "Reggane", 
    "Sidi Abdallah", "Sidi Abdallah"), free_zone = c("", "", 
    "", "", "", ""), relocation = c("", "", "", "", "", ""), 
    sector = c("Building materials", "Coal, oil & gas", "Coal, oil & gas", 
    "Coal, oil & gas", "Pharmaceuticals", "Healthcare"), sub_sector = c("Cement & concrete products", 
    "Oil & gas extraction", "Oil & gas extraction", "Oil & gas extraction", 
    "Pharmaceutical preparations", "Other (Healthcare)"), cluster = c("Construction", 
    "Energy", "Energy", "Energy", "Life sciences", "Life sciences"
    ), activity = c("Manufacturing", "Extraction", "Extraction", 
    "Extraction", "Manufacturing", "Manufacturing"), fdi_jobs = c(351L, 
    145L, 235L, 227L, 150L, 45L), est_fdi_jobs = c("Yes", "Yes", 
    "Yes", "Yes", "No", "No"), capital = c(139.9, 350, 565, 299.7, 
    29.55, 2.5), est_capital = c("Yes", "No", "No", "Yes", "No", 
    "No"), fdi_type = c("New", "New", "New", "Expansion", "New", 
    "New"), fdi_status = c("Announced", "Announced", "Announced", 
    "Opened", "Announced", "Opened"), year = c(2013L, 2003L, 
    2008L, 2017L, 2017L, 2003L), code_d = c("012", "012", "012", 
    "012", "012", "012"), income_d = c("MIDLW", "MIDLW", "MIDLW", 
    "MIDLW", "MIDLW", "MIDLW"), continent_d = c("Africa", "Africa", 
    "Africa", "Africa", "Africa", "Africa"), lang_d = c("Arabic", 
    "Arabic", "Arabic", "Arabic", "Arabic", "Arabic"), landlocked = c(0L, 
    0L, 0L, 0L, 0L, 0L), iso_d = c("DZA", "DZA", "DZA", "DZA", 
    "DZA", "DZA"), isic = c("26", "11", "11", "11", "24", "85"
    ), isic4 = c(2695, 1110, 1110, 1110, 2411, 8519), sector_eora = c("Petroleum, Chemical and Non-Metallic Mineral Products", 
    "Mining and Quarrying", "Mining and Quarrying", "Mining and Quarrying", 
    "Petroleum, Chemical and Non-Metallic Mineral Products", 
    "Mining and Quarrying")), datalabel = "", time.stamp = "24 May 2021 12:23", formats = c("%111s", 
"%125s", "%125s", "%105s", "%10.0g", "%10.0g", "%10.0g", "%10.0g", 
"%28s", "%52s", "%54s", "%31s", "%44s", "%51s", "%49s", "%53s", 
"%70s", "%28s", "%29s", "%92s", "%32s", "%40s", "%10.0g", "%9s", 
"%10.0g", "%9s", "%12s", "%14s", "%19s", "%10.0g", "%10.0g", 
"%10.0g", "%3s", "%9s", "%7s", "%14s", "%8.0g", "%3s"), types = c(111L, 
125L, 125L, 105L, 65526L, 65528L, 65526L, 65529L, 28L, 52L, 54L, 
31L, 44L, 51L, 49L, 53L, 70L, 28L, 29L, 92L, 32L, 40L, 65528L, 
9L, 65526L, 9L, 12L, 14L, 19L, 65526L, 65526L, 65529L, 3L, 5L, 
7L, 14L, 65530L, 3L), val.labels = structure(c("", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", ""), .Names = c("", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "")), var.labels = c("", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "ISO country numeric code", 
"Classif World Bank by income level (See note)", "Continent", 
"Official language", "1 if landlocked", "ISO3 alpha code"), version = 118L, label.table = list(), expansion.fields = list(
    c("revenues_usd_ml", "destring", "Characters removed were:"
    ), c("revenues_usd_ml", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("Headcount", "destring", "Characters removed were:"), 
    c("Headcount", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("r_d_exp", "destring", "Characters removed were:"), 
    c("r_d_exp", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("est_year", "destring", "Characters removed were:"), 
    c("est_year", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("fdi_jobs", "destring", "Characters removed were:"), 
    c("fdi_jobs", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("capital", "destring", "Characters removed were:"), 
    c("capital", "destring_cmd", "destring _ Revenue__USD_m_ Headcount R_D_expenditure__USD_m_ Year_established Jobs_created Capital_investment, replace"
    ), c("g_lon", "destring_cmd", "destring g_lat g_lon m_latitude m_longitude, replace"
    ), c("g_lon", "destring", "Characters removed were:"), c("g_lat", 
    "destring_cmd", "destring g_lat g_lon m_latitude m_longitude, replace"
    ), c("g_lat", "destring", "Characters removed were:"), c("year", 
    "destring", "Characters removed were:"), c("year", "destring_cmd", 
    "destring year, replace"), c("_dta", "ReS_i", "iso3"), c("_dta", 
    "ReS_ver", "v.2"), c("_dta", "ReS_j", "iso_o"), c("_dta", 
    "ReS_str", "1"), c("_dta", "ReS_Xij", "p"), c("income_d", 
    "note1", "World Bank Classification:"), c("income_d", "note2", 
    "LOW: Low Income"), c("income_d", "note3", "MIDLW: Lower middle income"
    ), c("income_d", "note4", "MIDUP: Upper middle income"), 
    c("income_d", "note5", "HOECD: High income (OECD)"), c("income_d", 
    "note6", "HOTHR: High income (non OECD)"), c("income_d", 
    "note0", "6")), byteorder = "LSF", orig.dim = c(11478L, 38L
), data.label = character(0), row.names = c(NA, 6L), class = "data.frame")

My task is to paste the string "country" in column "state", BUT ONLY for values of "state" that are different from "Non Specified".

The closer I got was thanks to this one-liner:

FDI$state[!(FDI$state=="Not Specified")] <- do.call(paste, c(FDI[c], sep = ", "))

I had what I wanted, but for certain "Not Specified" values, it returned "Not Specified, nameofthecountry", even though !(FDI$state=="Not Specified") was FALSE in that cell.

It also returned this warning:

Warning message: In FDI$state[!(FDI$state == "Not Specified")] <- do.call(paste, : number of items to replace is not a multiple of replacement length

How can I do?

Upvotes: 1

Views: 80

Answers (2)

akrun
akrun

Reputation: 887108

We can use case_when

library(dplyr)
FDI %>%
    mutate(state = case_when(state != "Non Specified" ~ country, TRUE ~ state))

Upvotes: 1

Christopher Belanger
Christopher Belanger

Reputation: 631

If I'm understanding you right, you can do this using the package dplyr with a call to mutate() and if_else().

library(dplyr)
FDI %>%
  mutate(state = if_else(state != "Non Specified", country, state))

This checks to see if the value of state is different from Non Specified, and if it is then it replaces the value of state with the value of country.

Upvotes: 1

Related Questions