Reputation: 49
I'm trying to count the amount of rows that fulfill a given condition based on a variable. My code is the following:
CD_2018%>%summarise(twenty = n([porcentaje_acumulado_clientes<0.2]),
forty = n([porcentaje_acumulado_clientes<0.4]),
sixty = n([porcentaje_acumulado_clientes<0.6]),
eighty = n([porcentaje_acumulado_clientes<0.8])
)
However, this seems not to work. I also tried using n(id[porcentaje_acumulado_clientes<0.2])
, but it didnt work. I couldnt find something similar using n()
in the other questions.
Data:
CD_2018 <- structure(list(Codigo_Cliente = c(54688L, 28854L, 71951L, 222564L,
375419L, 66151L), Segmento = structure(c(5L, 5L, 3L, 5L, 3L,
3L), .Label = c("Clasico", "Emergente", "Mi_Negocio", "Preferencial",
"Prestige"), class = "factor"), Sal_Cons_TC_2018 = c(NA, NA,
38820.8212, NA, NA, 277874.1813), Sal_Cons_TC_2019 = c(NA, NA,
98899.6049, NA, NA, 284103.8359), Sal_Cons_CC_2018 = c(NA, 5932867.82,
NA, NA, NA, NA), Sal_Cons_CC_2019 = c(NA, 2891443.57, NA, NA,
NA, NA), Sal_Cons_CA_2018 = c(332429778.9279, 6119225.3163, 24548153.6145,
11366326.5221, 7590637.6539, 7283427.2443), Sal_Cons_CA_2019 = c(299599510.9669,
8183323.8751, 14419748.2394, 7848030.4762, 971111.4943, 3815088.7069
), Sal_Cons_CD_2018 = c(235849719.6407, 235830511.2803, 175709800,
174190782.1112, 134828233.5874, 125507000), Sal_Cons_CD_2019 = c(251984964.1521,
267979439.471, 105804400, 173706600, NA, 185157700), Sal_Cons_NE_2018 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_NE_2019 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Veh_2018 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Veh_2019 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Tie_2018 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Tie_2019 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Hip_2018 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Hip_2019 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Com_2018 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Com_2019 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Per_2018 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_Per_2019 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_LC_2018 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Sal_Cons_PR_LC_2019 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Prom_CC_2018 = c(NA,
4955113.05, NA, NA, NA, NA), Prom_CC_2019 = c(NA, 4395887.25,
NA, NA, NA, NA), Prom_CA_2018 = c(6989118, 131813.59, 1343168.3,
42440.5, 581442.72, 133269.29), Prom_CA_2019 = c(5655918.06,
154901.35, 97210.92, 872286.48, 467481.65, 85932.79), Prom_NE_2018 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Prom_NE_2019 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), Tipo_Cliente_2018 = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Nuevo", "Viejo"), class = "factor"),
Tipo_Cliente_2019 = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Nuevo",
"Viejo"), class = "factor"), cum = c(235849719.6407, 471680230.921,
647390030.921, 821580813.0322, 956409046.6196, 1081916046.6196
), ones = c(1, 1, 1, 1, 1, 1), clientes = c(1, 2, 3, 4, 5,
6), porcentaje_acumulado_CD = c(1.7023796894358, 3.4046207315895,
4.67290629584847, 5.93022748327579, 6.90342705617062, 7.8093453163556
), porcentaje_acumulado_clientes = c(0.0209292591042277,
0.0418585182084554, 0.0627877773126831, 0.0837170364169108,
0.104646295521139, 0.125575554625366)), row.names = c(NA,
-6L), groups = structure(list(Codigo_Cliente = c(28854L, 54688L,
66151L, 71951L, 222564L, 375419L), Segmento = structure(c(5L,
5L, 3L, 3L, 5L, 3L), .Label = c("Clasico", "Emergente", "Mi_Negocio",
"Preferencial", "Prestige"), class = "factor"), .rows = list(
2L, 1L, 6L, 3L, 4L, 5L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Upvotes: 0
Views: 34
Reputation: 887128
We can also use cut
to create a group variable and then do the group by n()
library(dplyr)
CD_2018 %>%
group_by(grp = cut(porcentaje_acumulado_clientes,
breaks = c(-Inf, 0.2, 0.4, 0.6, 0.8))) %>%
summarise(n = n())
Upvotes: 1
Reputation: 145775
Change n
to sum
and delete brackets:
CD_2018 %>% summarise(
twenty = sum(porcentaje_acumulado_clientes < 0.2),
forty = sum(porcentaje_acumulado_clientes < 0.4),
sixty = sum(porcentaje_acumulado_clientes < 0.6),
eighty = sum(porcentaje_acumulado_clientes < 0.8)
)
# # A tibble: 1 x 4
# twenty forty sixty eighty
# <int> <int> <int> <int>
# 1 6 6 6 6
Upvotes: 1