Renaming column values based on criteria

Question

I have a data set consisting of three columns: time, jar and measurement_type

For each unique measurement_type I have one measurement series jars 1, 2, and 3, and two measurements the jar: blank. I want to rename blank, so that first measurement in each measurement_type is called blank1 and the second blank2

Any ideas?

df <- structure(list(time = c("2021-04-02 23:40:20", "2021-04-02 23:41:15", 
"2021-04-02 23:42:10", "2021-04-02 23:43:05", "2021-04-02 23:44:55", 
"2021-04-02 23:45:50", "2021-04-02 23:46:45", "2021-04-02 23:47:40", 
"2021-04-02 23:48:35", "2021-04-02 23:49:30", "2021-04-02 23:50:25", 
"2021-04-02 23:52:15", "2021-04-03 00:36:15", "2021-04-03 00:37:10", 
"2021-04-03 00:39:00", "2021-04-03 00:39:55", "2021-04-03 00:56:25", 
"2021-04-03 00:57:20", "2021-04-03 00:58:15", "2021-04-03 00:59:10", 
"2021-04-03 01:00:05", "2021-04-03 01:01:00", "2021-04-03 01:02:50", 
"2021-04-03 01:03:45", "2021-04-03 01:04:40", "2021-04-03 01:05:35", 
"2021-04-03 01:06:30", "2021-04-03 01:23:54", "2021-04-03 01:24:49", 
"2021-04-03 01:25:44", "2021-04-03 01:26:39", "2021-04-03 01:28:29", 
"2021-04-03 01:29:24", "2021-04-03 01:30:19", "2021-04-03 01:31:14", 
"2021-04-03 01:32:09", "2021-04-03 01:33:04", "2021-04-03 01:33:59", 
"2021-04-03 01:35:49", "2021-04-03 01:36:44", "2021-04-03 01:37:39", 
"2021-04-03 01:38:34", "2021-04-03 01:39:29", "2021-04-03 01:48:39", 
"2021-04-03 01:49:34", "2021-04-03 01:50:29", "2021-04-03 01:58:44", 
"2021-04-03 01:59:39", "2021-04-03 02:00:34", "2021-04-03 02:01:29", 
"2021-04-03 02:11:34", "2021-04-03 02:12:29", "2021-04-03 02:18:54", 
"2021-04-03 02:19:49", "2021-04-03 02:20:44", "2021-04-03 02:21:39", 
"2021-04-03 02:22:34", "2021-04-03 02:23:29", "2021-04-03 02:24:24", 
"2021-04-03 02:25:19", "2021-04-03 02:26:14", "2021-04-03 02:27:09", 
"2021-04-03 02:28:04", "2021-04-03 02:28:59"), jar = c("blank", 
"blank", "blank", "blank", "blank", "blank", "blank", "1", "1", 
"1", "1", "1", "2", "2", "2", "2", "2", "blank", "blank", "blank", 
"blank", "blank", "blank", "blank", "3", "3", "3", "3", "3", 
"3", "3", "blank", "blank", "blank", "blank", "blank", "blank", 
"blank", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2", 
"2", "2", "2", "blank", "blank", "blank", "blank", "blank", "3", 
"3", "3", "3", "3", "3", "3"), measurement_type = c("a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b"), new_column = c("blank1", 
"blank1", "blank1", "blank1", "blank1", "blank1", "blank1", "1", 
"1", "1", "1", "1", "2", "2", "2", "2", "2", "blank2", "blank2", 
"blank2", "blank2", "blank2", "blank2", "blank2", "3", "3", "3", 
"3", "3", "3", "3", "blank1", "blank1", "blank1", "blank1", "blank1", 
"blank1", "blank1", "1", "1", "1", "1", "1", "1", "1", "1", "2", 
"2", "2", "2", "2", "2", "blank2", "blank2", "blank2", "blank2", 
"blank2", "3", "3", "3", "3", "3", "3", "3")), class = "data.frame", row.names = c(NA, 
-64L))

jmpivette · Accepted Answer

There might be a shorter solution but I would use data.table::rleid() function which is really useful for detecting groups of repeated values. To get the right index corresponding to your example, this value is converted to factor and then to numeric value:

library(dplyr)
df %>% 
  group_by(measurement_type) %>% 
  mutate(
    indx = data.table::rleid(jar),
    indx =  if_else(jar == "blank", 
                    indx, 
                    NA_integer_) %>% 
      as.factor() %>% 
      as.numeric()
  ) %>%  
  mutate(
    new_column = if_else(jar == "blank",
                         paste0(jar, indx),
                         jar)
  ) %>% 
  ungroup() %>% 
  select(-indx)

Renaming column values based on criteria

Answers (2)

Related Questions