Theresa_S
Theresa_S

Reputation: 361

apply ifelse to multiple columns and create new column

I have a data frame with 4 columns that contain job titles. For each column, I want to create a new column (category1, category2, category3, category4) which assign each job a category 1-10 depending on which words the job titles contain (e.g. if the job title contains the words "frontend", "ui", "ux" then the column category1 should say 1). I managed to do the categorizing manually for each column using the following code but want to do it at the same time for all 4 columns. Any help is appreciated!

data_rel$category1 <-
ifelse(grepl("frontend|ui|ux", data$job4_clean),1, ifelse(grepl("backend", data$job4_clean),2, ifelse(grepl("fullstack", data$job4_clean),3, ifelse(grepl("entwickler|development|application|developer|software",data$job4_clean),4, ifelse(grepl("data|analytics|machine|programmer|ml|engineer|engineering|programmer|learning",data$job4_clean),5, ifelse(grepl("research|teaching|akademischer|researcher",data$job4_clean),6, ifelse(grepl("project|manager|product|consultant|consulting",data$job4_clean),7, ifelse(grepl("it|security|technical|tech", data$job4_clean),8, ifelse(grepl("margketing|sales|media|saas|business|commerce|support|development|digital|markeing|graphic|designer|graphics|design",data$job4_clean),9, ifelse(grepl("founder|ceo|partner|chief|executive|cto",data$job4_clean),10,NA))))))))))
data_rel <- structure(list(job1 = c("phd fellow", "java developer intern", 
"optical engineer", " dwh bi engineer", " software engineer", 
"software developer", "data engineer", "application software engineer", 
"software developer", " web developer", "web developer", "web developer", 
"software engineer", "software engineer", " es computer", "associate software engineer", 
"fullstack ios developer", "technical delivery manager project manager", 
"software architect", "software developer"), job2 = c("research scientist", 
"analytics analyst", " developer", " data ml engineer", "graduate teaching assistant", 
"software developer", "machine learning engineer", "akademischer mitarbeiter machine learning and analytics", 
"backend develope", "lead php developer", "php system analytic software specialist", 
"webcreater", "data engineer", "software engineer", "assistant network administrator", 
"frontend engineer", "application infrastructor lead", "software engineer", 
"application developer", "software developer"), job3 = c("data scientist", 
"machine learning engineer", "application developer associate manager", 
NA, "co founder cto", NA, NA, NA, NA, NA, "lead php sugarcrm developer", 
" php developer", "data analysing researcher ", NA, "application developer consultance", 
"manager l1 ui frontend ", " software architect", "software engineering manager solution architect", 
"software developer consultance", "ai developer"), job4 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, "software architect development lead", 
"team leader", NA, NA, " application development specialist", 
" associate experience technology", NA, " software developer", 
"fullstack developer productowner", NA)), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame"))

Upvotes: 1

Views: 74

Answers (4)

ngwalton
ngwalton

Reputation: 383

Expanding on @zx8754's comment, you can put your code in a function, use lapply to apply to each column, and finish up with do.call to combine the columns as a data.frame again.

get_level <- function(col) {
  ifelse(grepl("frontend|ui|ux", col), 1, 
    ifelse(grepl("backend", col), 2,
      ifelse(grepl("fullstack", col), 3, 
        ifelse(grepl("entwickler|development|application|developer|software",col), 4, 
          ifelse(grepl("data|analytics|machine|programmer|ml|engineer|engineering|programmer|learning",col), 5, 
            ifelse(grepl("research|teaching|akademischer|researcher",col), 6, 
              ifelse(grepl("project|manager|product|consultant|consulting",col), 7, 
                ifelse(grepl("it|security|technical|tech", col), 8, 
                  ifelse(grepl("margketing|sales|media|saas|business|commerce|support|development|digital|markeing|graphic|designer|graphics|design", col), 9, 
                    ifelse(grepl("founder|ceo|partner|chief|executive|cto", col), 10,
                      NA))))))))))
}

cats <- lapply(data_rel, get_level)
cats <- do.call(cbind.data.frame, cats)
names(cats) <- paste0("category", seq_along(data_rel))

Upvotes: 1

GKi
GKi

Reputation: 39727

You can store the jobs in a vector an then iterate over them with sapply using grepl and match to get the job numbers.


jobs <- c("frontend|ui|ux"
        , "backend"
        , "fullstack"
        , "entwickler|development|application|developer|software"
        , "data|analytics|machine|programmer|ml|engineer|engineering|programmer|learning"
        , "research|teaching|akademischer|researcher"
        , "project|manager|product|consultant|consulting"
        , "it|security|technical|tech"
        , "margketing|sales|media|saas|business|commerce|support|development|digital|markeing|graphic|designer|graphics|design"
        , "founder|ceo|partner|chief|executive|cto")

sapply(data_rel, function(x) apply(sapply(jobs, grepl, x), 1, match, x=TRUE))
#      job1 job2 job3 job4
# [1,]   NA    6    5   NA
# [2,]    4    5    5   NA
# [3,]    5    4    4   NA
# [4,]    5    5   NA   NA
# [5,]    4    6   10   NA
# [6,]    4    4   NA   NA
# [7,]    5    5   NA   NA
# [8,]    4    5   NA   NA
# [9,]    4    2   NA   NA
#[10,]    4    4   NA   NA
#[11,]    4    4    4    4
#[12,]    4   NA    4   NA
#[13,]    4    5    5   NA
#[14,]    4    4   NA   NA
#[15,]   NA   NA    4    4
#[16,]    4    1    1    8
#[17,]    3    4    4   NA
#[18,]    7    4    4    4
#[19,]    4    4    4    3
#[20,]    4    4    4   NA

Upvotes: 0

Ronak Shah
Ronak Shah

Reputation: 389315

You can write a function with case_when -

library(dplyr)

change_category <- function(x) {
  case_when(grepl("frontend|ui|ux", x) ~ 1L, 
            grepl("backend", x) ~ 2L, 
            grepl("fullstack", x) ~ 3L, 
            grepl("entwickler|development|application|developer|software",x) ~ 4L, 
            grepl("data|analytics|machine|programmer|ml|engineer|engineering|programmer|learning",x) ~5L,
            grepl("research|teaching|akademischer|researcher",x) ~ 6L, 
            grepl("project|manager|product|consultant|consulting", x) ~ 7L, 
            grepl("it|security|technical|tech", x) ~ 8L, 
            grepl("margketing|sales|media|saas|business|commerce|support|development|digital|markeing|graphic|designer|graphics|design",x) ~ 9L, 
            grepl("founder|ceo|partner|chief|executive|cto",x) ~10L)
}

and apply it with across -

data_rel %>% mutate(across(.fns = change_category, .names = '{col}_category'))

Upvotes: 0

Anoushiravan R
Anoushiravan R

Reputation: 21938

We can use map_dfc from purrr package to iterate over each column of our data set detecting the category and bind the resulting column column-wise. I used case_when instead of ifelse for a slightly nicer-looked output:

library(dplyr)
library(purrr)

data_rel %>%
  map_dfc(~ case_when(
    grepl("frontend|ui|ux", .x) ~ 1, 
    grepl("backend", .x) ~ 2,
    grepl("fullstack", .x) ~ 3,
    grepl("entwickler|development|application|developer|software", .x) ~ 4,
    grepl("data|analytics|machine|programmer|ml|engineer|engineering|programmer|learning", .x) ~ 5,
    grepl("research|teaching|akademischer|researcher", .x) ~ 6, 
    grepl("project|manager|product|consultant|consulting", .x) ~ 7,
    grepl("it|security|technical|tech", .x) ~ 8,
    grepl("margketing|sales|media|saas|business|commerce|support|development|digital|markeing|graphic|designer|graphics|design", .x) ~ 9,
    grepl("founder|ceo|partner|chief|executive|cto", .x) ~ 10,
    TRUE ~ as.numeric(NA)
  ))

# A tibble: 20 x 4
    job1  job2  job3  job4
   <dbl> <dbl> <dbl> <dbl>
 1    NA     6     5    NA
 2     4     5     5    NA
 3     5     4     4    NA
 4     5     5    NA    NA
 5     4     6    10    NA
 6     4     4    NA    NA
 7     5     5    NA    NA
 8     4     5    NA    NA
 9     4     2    NA    NA
10     4     4    NA    NA
11     4     4     4     4
12     4    NA     4    NA
13     4     5     5    NA
14     4     4    NA    NA
15    NA    NA     4     4
16     4     1     1     8
17     3     4     4    NA
18     7     4     4     4
19     4     4     4     3
20     4     4     4    NA

or in base R:

cbind(sapply(data_rel, function(x) {
  ifelse(grepl("frontend|ui|ux", x),1, 
         ifelse(grepl("backend", x),2, 
                ifelse(grepl("fullstack", x),3, 
                       ifelse(grepl("entwickler|development|application|developer|software", x),4, 
                              ifelse(grepl("data|analytics|machine|programmer|ml|engineer|engineering|programmer|learning", x),5, 
                                     ifelse(grepl("research|teaching|akademischer|researcher", x),6, 
                                            ifelse(grepl("project|manager|product|consultant|consulting", x),7, 
                                                   ifelse(grepl("it|security|technical|tech", x),8, 
                                                          ifelse(grepl("margketing|sales|media|saas|business|commerce|support|development|digital|markeing|graphic|designer|graphics|design", x),9, 
                                                                 ifelse(grepl("founder|ceo|partner|chief|executive|cto", x),10,NA))))))))))
}))

Upvotes: 0

Related Questions