ah bon
ah bon

Reputation: 10011

Count numbers and percentage of negative, 0 and positive values for each column in R

I have a toy dataset as follows:

df <- structure(list(id = 1:11, price = c(40.59, 70.42, 1.8, 1.98, 
65.02, 2.23, 54.79, 54.7, 3.32, 1.77, 3.5), month_pct = structure(c(11L, 
10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 1L, 2L), .Label = c("-19.91%", 
"-8.55%", "1.22%", "1.39%", "1.41%", "1.83%", "2.02%", "2.59%", 
"2.86%", "6.58%", "8.53%"), class = "factor"), year_pct = structure(c(4L, 
9L, 5L, 3L, 10L, 1L, 11L, 8L, 6L, 7L, 2L), .Label = c("-10.44%", 
"-19.91%", "-2.46%", "-35.26%", "-4.26%", "-5.95%", "-6.35%", 
"-6.91%", "-7.95%", "1.51%", "1.54%"), class = "factor")), class = "data.frame", row.names = c(NA, 
-11L))

Out:

   id  price month_pct year_pct
0    1  40.59     8.53%  -35.26%
1    2  70.42     6.58%   -7.95%
2    3   1.80     2.86%   -4.26%
3    4   1.98     2.59%   -2.46%
4    5  65.02     2.02%    1.51%
5    6   2.23     1.83%  -10.44%
6    7  54.79     1.41%    1.54%
7    8  54.70     1.39%   -6.91%
8    9   3.32     1.22%   -5.95%
9   10   1.77   -19.91%   -6.35%
10  11   3.50    -8.55%  -19.91%

Now I want to count the number and percentage of positive, 0 and negative for columns month_pct and year_pct, how could I do that in R?

Thanks.

Upvotes: 2

Views: 824

Answers (3)

Chris Ruehlemann
Chris Ruehlemann

Reputation: 21400

Here's a base R approach using regex:

sts <- data.frame(
  sign = c("positive", "zero", "negative"),
  month_number = c(length(which(grepl("^\\d", df$month_pct))), 
                   length(which(df$month_pct==0)),
                   length(which(grepl("^-", df$month_pct)))),
  month_percent = c(length(which(grepl("^\\d", df$month_pct)))/length(df$month_pct)*100,
                    length(which(df$month_pct==0))/length(df$month_pct)*100,
                    length(which(grepl("^-", df$month_pct)))/length(df$month_pct)*100),
  year_number = c(length(which(grepl("^\\d", df$year_pct))), 
                   length(which(df$year_pct==0)),
                   length(which(grepl("^-", df$year_pct)))),
  year_percent = c(length(which(grepl("^\\d", df$year_pct)))/length(df$year_pct)*100,
                   length(which(df$month_pct==0))/length(df$year_pct)*100,
                   length(which(grepl("^-", df$year_pct)))/length(df$year_pct)*100)
  )

Result:

sts
      sign month_number month_percent year_number year_percent
1 positive            9      81.81818           2     18.18182
2     zero            0       0.00000           0      0.00000
3 negative            2      18.18182           9     81.81818

Upvotes: 2

Ronak Shah
Ronak Shah

Reputation: 388982

Using dplyr 1.0.0 here is one way :

library(dplyr)

df %>%
   summarise(across(c(month_pct, year_pct), 
           ~table(factor(sign(readr::parse_number(as.character(.))), 
                  levels = -1:1)))) %>%
    mutate(sign = c('negative', 'zero', 'positive'), .before = month_pct) %>%
    rename_at(-1, ~sub('pct', 'n', .)) %>%
    mutate(across(-1, list(pct = ~./sum(.) * 100)))

#      sign month_n year_n month_n_pct year_n_pct
#1 negative       2      9        18.2       81.8
#2     zero       0      0         0.0        0.0
#3 positive       9      2        81.8       18.2

Upvotes: 1

tmfmnk
tmfmnk

Reputation: 39858

One dplyr and tidyr possibility could be:

df %>%
 pivot_longer(-c(1:2)) %>%
 group_by(name, 
          value_sign = factor(sign(as.numeric(sub("%", "", value))),
                              levels = -1:1,
                              labels = c("negative", "zero", "positive")),
          .drop = FALSE) %>%
 count() %>%
 group_by(name) %>%
 mutate(prop = n/sum(n)*100) 

  name      value_sign     n  prop
  <chr>     <fct>      <int> <dbl>
1 month_pct negative       2  18.2
2 month_pct zero           0   0  
3 month_pct positive       9  81.8
4 year_pct  negative       9  81.8
5 year_pct  zero           0   0  
6 year_pct  positive       2  18.2

Upvotes: 2

Related Questions