Reputation: 10011
I have a toy dataset as follows:
df <- structure(list(id = 1:11, price = c(40.59, 70.42, 1.8, 1.98,
65.02, 2.23, 54.79, 54.7, 3.32, 1.77, 3.5), month_pct = structure(c(11L,
10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 1L, 2L), .Label = c("-19.91%",
"-8.55%", "1.22%", "1.39%", "1.41%", "1.83%", "2.02%", "2.59%",
"2.86%", "6.58%", "8.53%"), class = "factor"), year_pct = structure(c(4L,
9L, 5L, 3L, 10L, 1L, 11L, 8L, 6L, 7L, 2L), .Label = c("-10.44%",
"-19.91%", "-2.46%", "-35.26%", "-4.26%", "-5.95%", "-6.35%",
"-6.91%", "-7.95%", "1.51%", "1.54%"), class = "factor")), class = "data.frame", row.names = c(NA,
-11L))
Out:
id price month_pct year_pct
0 1 40.59 8.53% -35.26%
1 2 70.42 6.58% -7.95%
2 3 1.80 2.86% -4.26%
3 4 1.98 2.59% -2.46%
4 5 65.02 2.02% 1.51%
5 6 2.23 1.83% -10.44%
6 7 54.79 1.41% 1.54%
7 8 54.70 1.39% -6.91%
8 9 3.32 1.22% -5.95%
9 10 1.77 -19.91% -6.35%
10 11 3.50 -8.55% -19.91%
Now I want to count the number and percentage of positive, 0 and negative
for columns month_pct
and year_pct
, how could I do that in R?
Thanks.
Upvotes: 2
Views: 824
Reputation: 21400
Here's a base R
approach using regex:
sts <- data.frame(
sign = c("positive", "zero", "negative"),
month_number = c(length(which(grepl("^\\d", df$month_pct))),
length(which(df$month_pct==0)),
length(which(grepl("^-", df$month_pct)))),
month_percent = c(length(which(grepl("^\\d", df$month_pct)))/length(df$month_pct)*100,
length(which(df$month_pct==0))/length(df$month_pct)*100,
length(which(grepl("^-", df$month_pct)))/length(df$month_pct)*100),
year_number = c(length(which(grepl("^\\d", df$year_pct))),
length(which(df$year_pct==0)),
length(which(grepl("^-", df$year_pct)))),
year_percent = c(length(which(grepl("^\\d", df$year_pct)))/length(df$year_pct)*100,
length(which(df$month_pct==0))/length(df$year_pct)*100,
length(which(grepl("^-", df$year_pct)))/length(df$year_pct)*100)
)
Result:
sts
sign month_number month_percent year_number year_percent
1 positive 9 81.81818 2 18.18182
2 zero 0 0.00000 0 0.00000
3 negative 2 18.18182 9 81.81818
Upvotes: 2
Reputation: 388982
Using dplyr
1.0.0
here is one way :
library(dplyr)
df %>%
summarise(across(c(month_pct, year_pct),
~table(factor(sign(readr::parse_number(as.character(.))),
levels = -1:1)))) %>%
mutate(sign = c('negative', 'zero', 'positive'), .before = month_pct) %>%
rename_at(-1, ~sub('pct', 'n', .)) %>%
mutate(across(-1, list(pct = ~./sum(.) * 100)))
# sign month_n year_n month_n_pct year_n_pct
#1 negative 2 9 18.2 81.8
#2 zero 0 0 0.0 0.0
#3 positive 9 2 81.8 18.2
Upvotes: 1
Reputation: 39858
One dplyr
and tidyr
possibility could be:
df %>%
pivot_longer(-c(1:2)) %>%
group_by(name,
value_sign = factor(sign(as.numeric(sub("%", "", value))),
levels = -1:1,
labels = c("negative", "zero", "positive")),
.drop = FALSE) %>%
count() %>%
group_by(name) %>%
mutate(prop = n/sum(n)*100)
name value_sign n prop
<chr> <fct> <int> <dbl>
1 month_pct negative 2 18.2
2 month_pct zero 0 0
3 month_pct positive 9 81.8
4 year_pct negative 9 81.8
5 year_pct zero 0 0
6 year_pct positive 2 18.2
Upvotes: 2