Tiptop
Tiptop

Reputation: 623

Renaming column values based on criteria

I have a data set consisting of three columns: time, jar and measurement_type

For each unique measurement_type I have one measurement series jars 1, 2, and 3, and two measurements the jar: blank. I want to rename blank, so that first measurement in each measurement_type is called blank1 and the second blank2

Any ideas?

df <- structure(list(time = c("2021-04-02 23:40:20", "2021-04-02 23:41:15", 
"2021-04-02 23:42:10", "2021-04-02 23:43:05", "2021-04-02 23:44:55", 
"2021-04-02 23:45:50", "2021-04-02 23:46:45", "2021-04-02 23:47:40", 
"2021-04-02 23:48:35", "2021-04-02 23:49:30", "2021-04-02 23:50:25", 
"2021-04-02 23:52:15", "2021-04-03 00:36:15", "2021-04-03 00:37:10", 
"2021-04-03 00:39:00", "2021-04-03 00:39:55", "2021-04-03 00:56:25", 
"2021-04-03 00:57:20", "2021-04-03 00:58:15", "2021-04-03 00:59:10", 
"2021-04-03 01:00:05", "2021-04-03 01:01:00", "2021-04-03 01:02:50", 
"2021-04-03 01:03:45", "2021-04-03 01:04:40", "2021-04-03 01:05:35", 
"2021-04-03 01:06:30", "2021-04-03 01:23:54", "2021-04-03 01:24:49", 
"2021-04-03 01:25:44", "2021-04-03 01:26:39", "2021-04-03 01:28:29", 
"2021-04-03 01:29:24", "2021-04-03 01:30:19", "2021-04-03 01:31:14", 
"2021-04-03 01:32:09", "2021-04-03 01:33:04", "2021-04-03 01:33:59", 
"2021-04-03 01:35:49", "2021-04-03 01:36:44", "2021-04-03 01:37:39", 
"2021-04-03 01:38:34", "2021-04-03 01:39:29", "2021-04-03 01:48:39", 
"2021-04-03 01:49:34", "2021-04-03 01:50:29", "2021-04-03 01:58:44", 
"2021-04-03 01:59:39", "2021-04-03 02:00:34", "2021-04-03 02:01:29", 
"2021-04-03 02:11:34", "2021-04-03 02:12:29", "2021-04-03 02:18:54", 
"2021-04-03 02:19:49", "2021-04-03 02:20:44", "2021-04-03 02:21:39", 
"2021-04-03 02:22:34", "2021-04-03 02:23:29", "2021-04-03 02:24:24", 
"2021-04-03 02:25:19", "2021-04-03 02:26:14", "2021-04-03 02:27:09", 
"2021-04-03 02:28:04", "2021-04-03 02:28:59"), jar = c("blank", 
"blank", "blank", "blank", "blank", "blank", "blank", "1", "1", 
"1", "1", "1", "2", "2", "2", "2", "2", "blank", "blank", "blank", 
"blank", "blank", "blank", "blank", "3", "3", "3", "3", "3", 
"3", "3", "blank", "blank", "blank", "blank", "blank", "blank", 
"blank", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2", 
"2", "2", "2", "blank", "blank", "blank", "blank", "blank", "3", 
"3", "3", "3", "3", "3", "3"), measurement_type = c("a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", 
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b"), new_column = c("blank1", 
"blank1", "blank1", "blank1", "blank1", "blank1", "blank1", "1", 
"1", "1", "1", "1", "2", "2", "2", "2", "2", "blank2", "blank2", 
"blank2", "blank2", "blank2", "blank2", "blank2", "3", "3", "3", 
"3", "3", "3", "3", "blank1", "blank1", "blank1", "blank1", "blank1", 
"blank1", "blank1", "1", "1", "1", "1", "1", "1", "1", "1", "2", 
"2", "2", "2", "2", "2", "blank2", "blank2", "blank2", "blank2", 
"blank2", "3", "3", "3", "3", "3", "3", "3")), class = "data.frame", row.names = c(NA, 
-64L))

Upvotes: 1

Views: 45

Answers (2)

TarJae
TarJae

Reputation: 78917

A dplyr only solution. This should also work

library(dplyr)
df <- df %>% 
  group_by(measurement_type) %>% 
  mutate(flag = if_else(jar != lag(jar) & jar=="blank", 1, 0, missing = 1),  
         sequence = cumsum(flag)) %>% 
  mutate(jar = case_when(jar=="blank" ~ paste(jar,sequence, sep = ""),
                         TRUE ~ jar)) %>% 
  select(-new_column, -flag, -sequence)

Output:

   time                jar    measurement_type
   <chr>               <chr>  <chr>           
 1 2021-04-02 23:40:20 blank1 a               
 2 2021-04-02 23:41:15 blank1 a               
 3 2021-04-02 23:42:10 blank1 a               
 4 2021-04-02 23:43:05 blank1 a               
 5 2021-04-02 23:44:55 blank1 a               
 6 2021-04-02 23:45:50 blank1 a               
 7 2021-04-02 23:46:45 blank1 a               
 8 2021-04-02 23:47:40 1      a               
 9 2021-04-02 23:48:35 1      a               
10 2021-04-02 23:49:30 1      a               
11 2021-04-02 23:50:25 1      a               
12 2021-04-02 23:52:15 1      a               
13 2021-04-03 00:36:15 2      a               
14 2021-04-03 00:37:10 2      a               
15 2021-04-03 00:39:00 2      a               
16 2021-04-03 00:39:55 2      a               
17 2021-04-03 00:56:25 2      a               
18 2021-04-03 00:57:20 blank2 a               
19 2021-04-03 00:58:15 blank2 a               
20 2021-04-03 00:59:10 blank2 a               
21 2021-04-03 01:00:05 blank2 a               
22 2021-04-03 01:01:00 blank2 a               
23 2021-04-03 01:02:50 blank2 a               
24 2021-04-03 01:03:45 blank2 a               
25 2021-04-03 01:04:40 3      a               
26 2021-04-03 01:05:35 3      a               
27 2021-04-03 01:06:30 3      a               
28 2021-04-03 01:23:54 3      a               
29 2021-04-03 01:24:49 3      a               
30 2021-04-03 01:25:44 3      a               
31 2021-04-03 01:26:39 3      a               
32 2021-04-03 01:28:29 blank1 b               
33 2021-04-03 01:29:24 blank1 b               
34 2021-04-03 01:30:19 blank1 b               
35 2021-04-03 01:31:14 blank1 b               
36 2021-04-03 01:32:09 blank1 b               
37 2021-04-03 01:33:04 blank1 b               
38 2021-04-03 01:33:59 blank1 b               
39 2021-04-03 01:35:49 1      b               
40 2021-04-03 01:36:44 1      b               
41 2021-04-03 01:37:39 1      b               
42 2021-04-03 01:38:34 1      b               
43 2021-04-03 01:39:29 1      b               
44 2021-04-03 01:48:39 1      b               
45 2021-04-03 01:49:34 1      b               
46 2021-04-03 01:50:29 1      b               
47 2021-04-03 01:58:44 2      b               
48 2021-04-03 01:59:39 2      b               
49 2021-04-03 02:00:34 2      b               
50 2021-04-03 02:01:29 2      b               
51 2021-04-03 02:11:34 2      b               
52 2021-04-03 02:12:29 2      b               
53 2021-04-03 02:18:54 blank2 b               
54 2021-04-03 02:19:49 blank2 b               
55 2021-04-03 02:20:44 blank2 b               
56 2021-04-03 02:21:39 blank2 b               
57 2021-04-03 02:22:34 blank2 b               
58 2021-04-03 02:23:29 3      b               
59 2021-04-03 02:24:24 3      b               
60 2021-04-03 02:25:19 3      b               
61 2021-04-03 02:26:14 3      b               
62 2021-04-03 02:27:09 3      b               
63 2021-04-03 02:28:04 3      b               
64 2021-04-03 02:28:59 3      b  

Upvotes: 2

jmpivette
jmpivette

Reputation: 275

There might be a shorter solution but I would use data.table::rleid() function which is really useful for detecting groups of repeated values. To get the right index corresponding to your example, this value is converted to factor and then to numeric value:

library(dplyr)
df %>% 
  group_by(measurement_type) %>% 
  mutate(
    indx = data.table::rleid(jar),
    indx =  if_else(jar == "blank", 
                    indx, 
                    NA_integer_) %>% 
      as.factor() %>% 
      as.numeric()
  ) %>%  
  mutate(
    new_column = if_else(jar == "blank",
                         paste0(jar, indx),
                         jar)
  ) %>% 
  ungroup() %>% 
  select(-indx)

Upvotes: 2

Related Questions