bvowe
bvowe

Reputation: 3384

R group by substring

Sample data

data = data.frame(id = c(1, 2, 3, 4, 5),
              name = c("blue", "green", "red", "read", "HUE"),
              WANT = c("ue", "re", "re", "re", "ue"))

To explain. If 'name' contains "ue", then WANT = "ue" and if 'name' contains 're' then WANT = "re". Capitalization does not matter.

This is my attempt:

    df$attempt <- NA
df$attempt[substr(df$name) == "ue"] <- "ue"
df$attempt[substr(df$name) == "re"] <- "re"

Upvotes: 1

Views: 712

Answers (4)

www
www

Reputation: 39154

A solution using stringr (part of the tidyverse).

library(tidyverse)

data2 <- data %>%
  mutate(attempt = str_extract(name, pattern = regex("ue|re", ignore_case = TRUE)),
         attempt = str_to_lower(attempt))
data2
#   id  name WANT attempt
# 1  1  blue   ue      ue
# 2  2 green   re      re
# 3  3   red   re      re
# 4  4  read   re      re
# 5  5   HUE   ue      ue

DATA

data = data.frame(id = c(1, 2, 3, 4, 5),
              name = c("blue", "green", "red", "read", "HUE"),
              WANT = c("ue", "re", "re", "re", "ue"))

Upvotes: 2

acylam
acylam

Reputation: 18661

With purrr and dplyr:

library(dplyr)
library(purrr)

data %>%
  mutate(group = map2_chr(WANT, name, ~ .x[grepl(.x, .y, ignore.case = TRUE)]))

Output:

  id  name WANT group
1  1  blue   ue    ue
2  2 green   re    re
3  3   red   re    re
4  4  read   re    re
5  5   HUE   hu    hu

Data:

data = data.frame(id = c(1, 2, 3, 4, 5),
                   name = c("blue", "green", "red", "read", "HUE"),
                   WANT = c("ue", "re", "re", "re", "hu"),
                   stringsAsFactors = FALSE)

Upvotes: 0

Sada93
Sada93

Reputation: 2835

Try using ifelse and mutate. grepl("ue",name,ignore.case = T) checks if ue or UE exists. Same logic applies to [re]

library(dplyr)

    data = data%>%
  mutate(Attempt = ifelse(grepl("ue",name,ignore.case = T),"ue",
                          ifelse(grepl("re",name,ignore.case = T),"re",NA)))

Upvotes: 0

alexb523
alexb523

Reputation: 728

Here is a couple of versions

data = data.frame(id = c(1, 2, 3, 4, 5),
                  name = c("blue", "green", "red", "read", "HUE"))


#base r version
data$want <- ifelse(grepl("ue", data$name, ignore.case = T), "ue",
                    ifelse(grepl("re", data$name, ignore.case = T), "re",
                           NA))
#tidyverse version
library(dplyr)

data <- data %>%
  mutate(want = ifelse(grepl("ue", name, ignore.case = T), "ue",
                       ifelse(grepl("re", name, ignore.case = T), "re",
                              NA)))

Upvotes: 2

Related Questions