Reputation: 15
I want to do a simple counting of rows grouped by id (PIZ). The counting should restart both at every new id (PIZ) and if the value of the column "Wert" exceeds 500.
this is the data:
> dput(head(Labor_Neutrophile_alle2, n = 20))
structure(list(PIZ = c("37190985", "37190985", "37190985", "37190985",
"37190985", "37190985", "37190985", "37190985", "37190985", "30195566",
"30195566", "30195566", "30195566", "30195566", "30195566", "30195566",
"30195566", "30195566", "30195566", "30195566"), Datum = c(2019.98630136986,
2019.97808219178, 2019.97534246575, 2019.96712328767, 2019.95616438356,
2019.94794520548, 2019.94246575342, 2019.93698630137, 2019.93150684932,
2019.94520547945, 2019.92876712329, 2019.9095890411, 2019.8904109589,
2019.87123287671, 2019.85205479452, 2019.84931506849, 2019.84383561644,
2019.84109589041, 2019.82191780822, 2019.81369863014), Parameter = c("Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut"), Wert = c(20, 190, 320, 830, 1450, 870,
650, 1390, 1090, 1850, 1800, 1300, 400, 2800, 1360, 1200, 1510,
770, 2300, 2010), Einheit = c("10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL"), Neutrophilie = c(0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
And my desired outcome would be:
> dput(head(Neutro, n = 20))
structure(list(...1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20), PIZ = c(10941890, 10942071,
11712886, 17089951, 17228870, 17250590, 17250590, 17250590, 17250590,
17250590, 17250590, 17250590, 17250590, 17250590, 17250590, 17250590,
17250590, 17250590, 17250590, 17250590), Datum = c(2019.189,
2014.195, 2014.942, 2015.203, 2014.323, 2013.764, 2013.841, 2014.09,
2014.115, 2014.134, 2014.156, 2014.189, 2014.192, 2014.195, 2014.197,
2014.2, 2014.205, 2014.208, 2014.211, 2014.214), Parameter = c("Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut"), Wert = c(4310, 5810, 3700, 3090, 3010,
1620, 1370, 2410, 2120, 710, 60, 20, 90, 120, 230, 220, 190,
180, 470, 1260), Einheit = c("10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL"), Neutrophilie = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), Count = c(1,
1, 1, 1, 1, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
I have tried a couple of things but, ya...:
xxx <- Labor_Neutrophile_alle2 %>%
group_by(PIZ, Wert > 500) %>%
summarise(count=n())
Upvotes: 0
Views: 136
Reputation: 173793
You could try this dplyr
method. You group by PIZ
, then add a dummy variable to split into new groups depending on whether Wert
is greater than 500. Then you just produce an integer sequence in each subgroup.
Note that your desired outcome has completely different data from the input data - this is presumably a mistake.
Labor_Neutrophile_alle2 %>%
group_by(PIZ) %>%
mutate(Count1 = cumsum(Wert > 500)) %>%
group_by(PIZ, Count1) %>%
mutate(Count = seq(n())) %>%
ungroup() %>%
dplyr::select(-c(Count1))
#> # A tibble: 20 x 7
#> PIZ Datum Parameter Wert Einheit Neutrophilie Count
#> <chr> <dbl> <chr> <dbl> <chr> <dbl> <int>
#> 1 37190985 2020. Neutrophile absolut 20 10^3/µL 0 1
#> 2 37190985 2020. Neutrophile absolut 190 10^3/µL 0 2
#> 3 37190985 2020. Neutrophile absolut 320 10^3/µL 0 3
#> 4 37190985 2020. Neutrophile absolut 830 10^3/µL 1 1
#> 5 37190985 2020. Neutrophile absolut 1450 10^3/µL 1 1
#> 6 37190985 2020. Neutrophile absolut 870 10^3/µL 1 1
#> 7 37190985 2020. Neutrophile absolut 650 10^3/µL 1 1
#> 8 37190985 2020. Neutrophile absolut 1390 10^3/µL 1 1
#> 9 37190985 2020. Neutrophile absolut 1090 10^3/µL 1 1
#> 10 30195566 2020. Neutrophile absolut 1850 10^3/µL 1 1
#> 11 30195566 2020. Neutrophile absolut 1800 10^3/µL 1 1
#> 12 30195566 2020. Neutrophile absolut 1300 10^3/µL 1 1
#> 13 30195566 2020. Neutrophile absolut 400 10^3/µL 0 2
#> 14 30195566 2020. Neutrophile absolut 2800 10^3/µL 1 1
#> 15 30195566 2020. Neutrophile absolut 1360 10^3/µL 1 1
#> 16 30195566 2020. Neutrophile absolut 1200 10^3/µL 1 1
#> 17 30195566 2020. Neutrophile absolut 1510 10^3/µL 1 1
#> 18 30195566 2020. Neutrophile absolut 770 10^3/µL 1 1
#> 19 30195566 2020. Neutrophile absolut 2300 10^3/µL 1 1
#> 20 30195566 2020. Neutrophile absolut 2010 10^3/µL 1 1
Upvotes: 1