Reputation: 469
I have hourly/daily stock return data similar to the following:
set.seed(1)
dt <- data.table(stock = c(rep("a",24),rep("b",24),rep("c",24),rep("d",24)),
hour = rep(1:24,4), day1 = sample(-5:5,96,replace = TRUE),
day2 = sample(-10:-1,96,replace = TRUE), day3 = sample(0:10,96,replace = TRUE),
day4 = 0)
Which looks like:
stock hour day1 day2 day3 day4
a 1 -3 -6 1 0
a 2 -1 -6 10 0
a 3 1 -2 3 0
...
d 22 4 -5 1 0
d 23 3 -3 3 0
d 24 3 -7 1 0
I would like to count the following for each day:
The output should look similar to this:
counts day1 day2 day3 day4
stocks_where_all_hours_negative 0 4 0 0
stocks_where_all_hours_positive 0 0 4 0
stocks_where_mixed_pos_and_neg 4 0 0 0
stocks_where_at_least_one_zero 4 0 2 4
stocks_where_all_zero 0 0 0 4
Thanks so much.
Upvotes: 0
Views: 66
Reputation: 14360
Since the summary you are doing is so highly specialized, you could write your own function:
summarize_stocks <- function(stock, day, return_names = FALSE){
if(!return_names) return( c(length(unique(stock[as.logical(ave(day, stock, FUN = function(x) all(x<0)))])),
length(unique(stock[as.logical(ave(day, stock, FUN = function(x) all(x>0)))])),
length(unique(stock[as.logical(ave(day, stock, FUN = function(x) any(x>0) & any(x<0)))])),
length(unique(stock[as.logical(ave(day, stock, FUN = function(x) any(x==0)))])),
length(unique(stock[as.logical(ave(day, stock, FUN = function(x) all(x==0)))]))
))
else return(c("stocks_where_all_hours_negative", "stocks_where_all_hours_positive", "stocks_where_mixed_pos_and_neg",
"stocks_where_at_least_one_zero", "stocks_where_all_zero"))
}
res <- dt[, lapply(.SD, summarize_stocks, stock = stock), .SDcols = c("day1","day2", "day3", "day4")]
res[, counts := summarize_stocks(return_names = TRUE)]
res
# day1 day2 day3 day4 counts
#1: 0 4 0 0 stocks_where_all_hours_negative
#2: 0 0 2 0 stocks_where_all_hours_positive
#3: 4 0 0 0 stocks_where_mixed_pos_and_neg
#4: 4 0 2 4 stocks_where_at_least_one_zero
#5: 0 0 0 4 stocks_where_all_zero
Or if you want to get fancy you could change your summarize_stocks
to:
summarize_stocks <- function(stock, day, return_names = FALSE){
funs <- list(function(x) all(x<0),
function(x) all(x>0),
function(x) any(x>0) & any(x<0),
function(x) any(x==0),
function(x) all(x==0))
if(!return_names) return( vapply(funs, function(f) length(unique(stock[as.logical(ave(day, stock, FUN = function(x) f(x)))])), numeric(1L)))
else return(c("stocks_where_all_hours_negative", "stocks_where_all_hours_positive", "stocks_where_mixed_pos_and_neg",
"stocks_where_at_least_one_zero", "stocks_where_all_zero"))
}
Upvotes: 1
Reputation: 1364
mdt <- melt(
dt,
id.vars = c('stock', 'hour'),
measure.vars = c('day1', 'day2', 'day3', 'day4'),
variable.name = 'day',
value.name = 'position'
)
counts <- mdt[ ,.(stocks_where_all_hours_negative = all(position < 0),
stocks_where_all_hours_positive = all(position > 0),
stocks_where_mixed_pos_and_neg = any(position < 0) & any(position > 0),
stocks_where_at_least_one_zero = any(position != 0),
stocks_where_all_zero = all(position == 0)),
by = c('day',
'stock')][ ,.(stocks_where_all_hours_negative = sum(stocks_where_all_hours_negative),
stocks_where_all_hours_positive = sum(stocks_where_all_hours_positive),
stocks_where_mixed_pos_and_neg = sum(stocks_where_mixed_pos_and_neg),
stocks_where_at_least_one_zero = sum(stocks_where_at_least_one_zero),
stocks_where_all_zero = sum(stocks_where_all_zero)),
by = 'day']
t(counts)
First reshape the data to 'long' format so that you can group by days. any
and all
functions will check the logic for each vector and return the scenarios you have set up.
Upvotes: 2