Reputation: 623
I have a data set consisting of three columns: time
, jar
and measurement_type
For each unique measurement_type I have one measurement series jars
1
, 2
, and 3
, and two measurements the jar
: blank
. I want to rename blank
, so that first measurement in each measurement_type
is called blank1
and the second blank2
Any ideas?
df <- structure(list(time = c("2021-04-02 23:40:20", "2021-04-02 23:41:15",
"2021-04-02 23:42:10", "2021-04-02 23:43:05", "2021-04-02 23:44:55",
"2021-04-02 23:45:50", "2021-04-02 23:46:45", "2021-04-02 23:47:40",
"2021-04-02 23:48:35", "2021-04-02 23:49:30", "2021-04-02 23:50:25",
"2021-04-02 23:52:15", "2021-04-03 00:36:15", "2021-04-03 00:37:10",
"2021-04-03 00:39:00", "2021-04-03 00:39:55", "2021-04-03 00:56:25",
"2021-04-03 00:57:20", "2021-04-03 00:58:15", "2021-04-03 00:59:10",
"2021-04-03 01:00:05", "2021-04-03 01:01:00", "2021-04-03 01:02:50",
"2021-04-03 01:03:45", "2021-04-03 01:04:40", "2021-04-03 01:05:35",
"2021-04-03 01:06:30", "2021-04-03 01:23:54", "2021-04-03 01:24:49",
"2021-04-03 01:25:44", "2021-04-03 01:26:39", "2021-04-03 01:28:29",
"2021-04-03 01:29:24", "2021-04-03 01:30:19", "2021-04-03 01:31:14",
"2021-04-03 01:32:09", "2021-04-03 01:33:04", "2021-04-03 01:33:59",
"2021-04-03 01:35:49", "2021-04-03 01:36:44", "2021-04-03 01:37:39",
"2021-04-03 01:38:34", "2021-04-03 01:39:29", "2021-04-03 01:48:39",
"2021-04-03 01:49:34", "2021-04-03 01:50:29", "2021-04-03 01:58:44",
"2021-04-03 01:59:39", "2021-04-03 02:00:34", "2021-04-03 02:01:29",
"2021-04-03 02:11:34", "2021-04-03 02:12:29", "2021-04-03 02:18:54",
"2021-04-03 02:19:49", "2021-04-03 02:20:44", "2021-04-03 02:21:39",
"2021-04-03 02:22:34", "2021-04-03 02:23:29", "2021-04-03 02:24:24",
"2021-04-03 02:25:19", "2021-04-03 02:26:14", "2021-04-03 02:27:09",
"2021-04-03 02:28:04", "2021-04-03 02:28:59"), jar = c("blank",
"blank", "blank", "blank", "blank", "blank", "blank", "1", "1",
"1", "1", "1", "2", "2", "2", "2", "2", "blank", "blank", "blank",
"blank", "blank", "blank", "blank", "3", "3", "3", "3", "3",
"3", "3", "blank", "blank", "blank", "blank", "blank", "blank",
"blank", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2",
"2", "2", "2", "blank", "blank", "blank", "blank", "blank", "3",
"3", "3", "3", "3", "3", "3"), measurement_type = c("a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b"), new_column = c("blank1",
"blank1", "blank1", "blank1", "blank1", "blank1", "blank1", "1",
"1", "1", "1", "1", "2", "2", "2", "2", "2", "blank2", "blank2",
"blank2", "blank2", "blank2", "blank2", "blank2", "3", "3", "3",
"3", "3", "3", "3", "blank1", "blank1", "blank1", "blank1", "blank1",
"blank1", "blank1", "1", "1", "1", "1", "1", "1", "1", "1", "2",
"2", "2", "2", "2", "2", "blank2", "blank2", "blank2", "blank2",
"blank2", "3", "3", "3", "3", "3", "3", "3")), class = "data.frame", row.names = c(NA,
-64L))
Upvotes: 1
Views: 45
Reputation: 78917
A dplyr
only solution. This should also work
library(dplyr)
df <- df %>%
group_by(measurement_type) %>%
mutate(flag = if_else(jar != lag(jar) & jar=="blank", 1, 0, missing = 1),
sequence = cumsum(flag)) %>%
mutate(jar = case_when(jar=="blank" ~ paste(jar,sequence, sep = ""),
TRUE ~ jar)) %>%
select(-new_column, -flag, -sequence)
Output:
time jar measurement_type
<chr> <chr> <chr>
1 2021-04-02 23:40:20 blank1 a
2 2021-04-02 23:41:15 blank1 a
3 2021-04-02 23:42:10 blank1 a
4 2021-04-02 23:43:05 blank1 a
5 2021-04-02 23:44:55 blank1 a
6 2021-04-02 23:45:50 blank1 a
7 2021-04-02 23:46:45 blank1 a
8 2021-04-02 23:47:40 1 a
9 2021-04-02 23:48:35 1 a
10 2021-04-02 23:49:30 1 a
11 2021-04-02 23:50:25 1 a
12 2021-04-02 23:52:15 1 a
13 2021-04-03 00:36:15 2 a
14 2021-04-03 00:37:10 2 a
15 2021-04-03 00:39:00 2 a
16 2021-04-03 00:39:55 2 a
17 2021-04-03 00:56:25 2 a
18 2021-04-03 00:57:20 blank2 a
19 2021-04-03 00:58:15 blank2 a
20 2021-04-03 00:59:10 blank2 a
21 2021-04-03 01:00:05 blank2 a
22 2021-04-03 01:01:00 blank2 a
23 2021-04-03 01:02:50 blank2 a
24 2021-04-03 01:03:45 blank2 a
25 2021-04-03 01:04:40 3 a
26 2021-04-03 01:05:35 3 a
27 2021-04-03 01:06:30 3 a
28 2021-04-03 01:23:54 3 a
29 2021-04-03 01:24:49 3 a
30 2021-04-03 01:25:44 3 a
31 2021-04-03 01:26:39 3 a
32 2021-04-03 01:28:29 blank1 b
33 2021-04-03 01:29:24 blank1 b
34 2021-04-03 01:30:19 blank1 b
35 2021-04-03 01:31:14 blank1 b
36 2021-04-03 01:32:09 blank1 b
37 2021-04-03 01:33:04 blank1 b
38 2021-04-03 01:33:59 blank1 b
39 2021-04-03 01:35:49 1 b
40 2021-04-03 01:36:44 1 b
41 2021-04-03 01:37:39 1 b
42 2021-04-03 01:38:34 1 b
43 2021-04-03 01:39:29 1 b
44 2021-04-03 01:48:39 1 b
45 2021-04-03 01:49:34 1 b
46 2021-04-03 01:50:29 1 b
47 2021-04-03 01:58:44 2 b
48 2021-04-03 01:59:39 2 b
49 2021-04-03 02:00:34 2 b
50 2021-04-03 02:01:29 2 b
51 2021-04-03 02:11:34 2 b
52 2021-04-03 02:12:29 2 b
53 2021-04-03 02:18:54 blank2 b
54 2021-04-03 02:19:49 blank2 b
55 2021-04-03 02:20:44 blank2 b
56 2021-04-03 02:21:39 blank2 b
57 2021-04-03 02:22:34 blank2 b
58 2021-04-03 02:23:29 3 b
59 2021-04-03 02:24:24 3 b
60 2021-04-03 02:25:19 3 b
61 2021-04-03 02:26:14 3 b
62 2021-04-03 02:27:09 3 b
63 2021-04-03 02:28:04 3 b
64 2021-04-03 02:28:59 3 b
Upvotes: 2
Reputation: 275
There might be a shorter solution but I would use data.table::rleid()
function which is really useful for detecting groups of repeated values.
To get the right index corresponding to your example, this value is converted to factor and then to numeric value:
library(dplyr)
df %>%
group_by(measurement_type) %>%
mutate(
indx = data.table::rleid(jar),
indx = if_else(jar == "blank",
indx,
NA_integer_) %>%
as.factor() %>%
as.numeric()
) %>%
mutate(
new_column = if_else(jar == "blank",
paste0(jar, indx),
jar)
) %>%
ungroup() %>%
select(-indx)
Upvotes: 2