Reputation: 1361
I have a random walk with some drift. My goal is to create a function that adds a column to this data.table
labeling the "zone" its in based on its cumulative % gain and % drawdown.
library(data.table)
set.seed(1)
# generate random returns with drift
df <- data.table(
"date" = 1:50,
"ret" = rnorm(50, mean = .002, sd = .01)
)
# calculate the value of the random-walk over-time
df[, val := cumprod(1 + ret)]
df[, draw_down := val / cummax(val) - 1]
The first zone occurs in the first row and goes up until either a 5% cumulative gain
or 2% drawdown
occurs.
The second zone starts one row after the first zone ends, and continues until the same happens again, a 5% cumulative gain
or 2% drawdown
This repeats until neither of those things occur, in which case the zone continues to the last row.
Here is a reproducible example:
# start with the first row and zone of 1
idx <- 1
count <- 1
res <- data.table()
while (idx <= nrow(df)) {
# grab the start of the zone and all future rows
tmp <- df[idx:.N]
# calculate the necessary things
tmp[, val := cumprod(1 + ret)]
tmp[, draw_down := val / cummax(val) - 1]
# find out if we crossed our drawdown threshold
loss_idx <- which(
tmp$draw_down == min(tmp$draw_down[tmp$draw_down <= -.02])
)
# find out if we crossed gain threshold
gain_idx <- which(tmp$val == min(tmp$val[tmp$val >= 1.05]))
# if we have no thresholds, label the rest of the zones
# and exit
if (length(loss_idx) == 0 & length(gain_idx) == 0) {
tmp[, zone := count]
res <- rbind(res, tmp)
break
}
# mark the zone
tmp[1:min(gain_idx, loss_idx), zone := count]
# increment our index
idx <- tmp[min(gain_idx, loss_idx)]$date + 1
print(idx)
# increment our zone
count <- count + 1
res <- rbind(res, tmp[!is.na(zone)])
}
I have tried getting the indices of where these zone points would occur. But then I run into the problem of needing to recalculate the val
and drawdown
based on the last zone's index. I cannot figure out a way to vectorize that. Maybe using a roll
function would be effective here?
The problem boils down to knowing the draw-down by zone, but needing the previous zone in order to calculate the draw-down. Similarly with the cumulative return. Is it possible to vectorize this function if it depends on the previous value?
Any help in any direction would be greatly appreciated in trying to achieved the desired output below.
the desired output:
> res
date ret val draw_down zone
<int> <dbl> <dbl> <dbl> <dbl>
1 -0.0042645381 0.9957355 0.0000000000 1
2 0.0038364332 0.9995555 0.0000000000 1
3 -0.0063562861 0.9932021 -0.0063562861 1
4 0.0179528080 1.0110328 0.0000000000 1
5 0.0052950777 1.0163863 0.0000000000 1
6 -0.0062046838 1.0100800 -0.0062046838 1
7 0.0068742905 1.0170236 0.0000000000 1
8 0.0093832471 1.0265665 0.0000000000 1
9 0.0077578135 1.0345305 0.0000000000 1
10 -0.0010538839 1.0334402 -0.0010538839 1
11 0.0171178117 1.0511304 0.0000000000 1
12 0.0058984324 1.0058984 0.0000000000 2
13 -0.0042124058 1.0016612 -0.0042124058 2
14 -0.0201469989 0.9814807 -0.0242745373 2
15 0.0132493092 1.0132493 0.0000000000 3
16 0.0015506639 1.0148205 0.0000000000 3
17 0.0018380974 1.0166859 0.0000000000 3
18 0.0114383621 1.0283151 0.0000000000 3
19 0.0102122120 1.0388164 0.0000000000 3
20 0.0079390132 1.0470636 0.0000000000 3
21 0.0111897737 1.0587800 0.0000000000 3
22 0.0098213630 1.0691787 0.0000000000 3
23 0.0027456498 1.0721143 0.0000000000 3
24 -0.0178935170 1.0529304 -0.0178935170 3
25 0.0081982575 1.0615626 -0.0098419551 3
26 0.0014387126 1.0630899 -0.0084174023 3
27 0.0004420449 1.0635598 -0.0079790782 3
28 -0.0127075238 1.0500446 -0.0205852077 3
29 -0.0027815006 0.9972185 0.0000000000 4
30 0.0061794156 1.0033807 0.0000000000 4
31 0.0155867955 1.0190202 0.0000000000 4
32 0.0009721227 1.0200108 0.0000000000 4
33 0.0058767161 1.0260051 0.0000000000 4
34 0.0014619496 1.0275051 0.0000000000 4
35 -0.0117705956 1.0154108 -0.0117705956 4
36 -0.0021499456 1.0132277 -0.0138952351 4
37 -0.0019428995 1.0112591 -0.0158111376 4
38 0.0014068660 1.0126818 -0.0144265157 4
39 0.0130002537 1.0258469 -0.0016138103 4
40 0.0096317575 1.0357276 0.0000000000 4
41 0.0003547640 1.0360951 0.0000000000 4
42 -0.0005336168 1.0355422 -0.0005336168 4
43 0.0089696338 1.0448306 0.0000000000 4
44 0.0075666320 1.0527365 0.0000000000 4
45 -0.0048875569 0.9951124 0.0000000000 5
46 -0.0050749516 0.9900623 -0.0050749516 5
47 0.0056458196 0.9956520 0.0000000000 5
48 0.0096853292 1.0052952 0.0000000000 5
49 0.0008765379 1.0061764 0.0000000000 5
50 0.0108110773 1.0170543 0.0000000000 5
Upvotes: 4
Views: 191
Reputation: 25225
Assuming that you are exploring vectorization to speed up the calculations, here is another option to speed up the calculations using Rccp
:
library(Rcpp)
cppFunction("IntegerVector zoning(NumericVector idx) {
int zone = 1, n = idx.size();
IntegerVector res = IntegerVector(n);
double x0 = idx[0];
for (int i = 1; i < n; i++) {
res[i] = zone;
if (idx[i]/x0 < 0.98 || idx[i]/x0 > 1.05) {
if (i+1 < n) {
x0 = idx[i+1];
}
zone++;
}
}
return res;
}")
df[, zone := zoning(c(1, val))[-1L]]
output:
date ret val zone
1: 1 -0.0042645381 0.9957355 1
2: 2 0.0038364332 0.9995555 1
3: 3 -0.0063562861 0.9932021 1
4: 4 0.0179528080 1.0110328 1
5: 5 0.0052950777 1.0163863 1
6: 6 -0.0062046838 1.0100800 1
7: 7 0.0068742905 1.0170236 1
8: 8 0.0093832471 1.0265665 1
9: 9 0.0077578135 1.0345305 1
10: 10 -0.0010538839 1.0334402 1
11: 11 0.0171178117 1.0511304 1
12: 12 0.0058984324 1.0573304 2
13: 13 -0.0042124058 1.0528765 2
14: 14 -0.0201469989 1.0316642 2
15: 15 0.0132493092 1.0453331 3
16: 16 0.0015506639 1.0469540 3
17: 17 0.0018380974 1.0488784 3
18: 18 0.0114383621 1.0608759 3
19: 19 0.0102122120 1.0717098 3
20: 20 0.0079390132 1.0802181 3
21: 21 0.0111897737 1.0923055 3
22: 22 0.0098213630 1.1030334 3
23: 23 0.0027456498 1.1060620 4
24: 24 -0.0178935170 1.0862706 4
25: 25 0.0081982575 1.0951762 4
26: 26 0.0014387126 1.0967518 4
27: 27 0.0004420449 1.0972366 4
28: 28 -0.0127075238 1.0832934 4
29: 29 -0.0027815006 1.0802803 5
30: 30 0.0061794156 1.0869558 5
31: 31 0.0155867955 1.1038979 5
32: 32 0.0009721227 1.1049710 5
33: 33 0.0058767161 1.1114646 5
34: 34 0.0014619496 1.1130896 5
35: 35 -0.0117705956 1.0999878 5
36: 36 -0.0021499456 1.0976229 5
37: 37 -0.0019428995 1.0954903 5
38: 38 0.0014068660 1.0970316 5
39: 39 0.0130002537 1.1112932 5
40: 40 0.0096317575 1.1219969 5
41: 41 0.0003547640 1.1223950 5
42: 42 -0.0005336168 1.1217961 5
43: 43 0.0089696338 1.1318582 5
44: 44 0.0075666320 1.1404225 5
45: 45 -0.0048875569 1.1348486 6
46: 46 -0.0050749516 1.1290893 6
47: 47 0.0056458196 1.1354640 6
48: 48 0.0096853292 1.1464613 6
49: 49 0.0008765379 1.1474662 6
50: 50 0.0108110773 1.1598716 6
date ret val zone
Courtesy of https://rdrr.io/snippets/
Upvotes: 2
Reputation: 160417
I don't think a rolling calculation is the right way to go: typically they have fixed windows, whereas this is a bit more dynamic. Similarly, a cumulative operation (e.g., cumsum
) won't work for similar reasons. (That's not to say that I can't warp a zoo::rollapply
approach to do this, but I think it'd be much less efficient than this recommended approach.)
Here's a simple while
loop that appears to provide the zone
you're asking for:
breaks <- integer(0)
rn <- 1L
while (rn <= nrow(df)) {
theserows <- seq(rn, nrow(df))
ratios <- df$val[theserows] / df$val[theserows][1]
upordown <- which(ratios >= 1.05 | ratios <= 0.98)
if (!length(upordown)) break
breaks <- c(breaks, upordown[1] + rn)
rn <- rn + upordown[1]
}
df[, zone := cumsum(seq_len(.N) %in% breaks)]
# date ret val draw_down zone
# <int> <num> <num> <num> <int>
# 1: 1 -0.0042645381 0.9957355 0.0000000000 0
# 2: 2 0.0038364332 0.9995555 0.0000000000 0
# 3: 3 -0.0063562861 0.9932021 -0.0063562861 0
# 4: 4 0.0179528080 1.0110328 0.0000000000 0
# 5: 5 0.0052950777 1.0163863 0.0000000000 0
# 6: 6 -0.0062046838 1.0100800 -0.0062046838 0
# 7: 7 0.0068742905 1.0170236 0.0000000000 0
# 8: 8 0.0093832471 1.0265665 0.0000000000 0
# 9: 9 0.0077578135 1.0345305 0.0000000000 0
# 10: 10 -0.0010538839 1.0334402 -0.0010538839 0
# 11: 11 0.0171178117 1.0511304 0.0000000000 0
# 12: 12 0.0058984324 1.0573304 0.0000000000 1
# 13: 13 -0.0042124058 1.0528765 -0.0042124058 1
# 14: 14 -0.0201469989 1.0316642 -0.0242745373 1
# 15: 15 0.0132493092 1.0453331 -0.0113468490 2
# 16: 16 0.0015506639 1.0469540 -0.0098137803 2
# 17: 17 0.0018380974 1.0488784 -0.0079937216 2
# 18: 18 0.0114383621 1.0608759 0.0000000000 2
# 19: 19 0.0102122120 1.0717098 0.0000000000 2
# 20: 20 0.0079390132 1.0802181 0.0000000000 2
# 21: 21 0.0111897737 1.0923055 0.0000000000 2
# 22: 22 0.0098213630 1.1030334 0.0000000000 2
# 23: 23 0.0027456498 1.1060620 0.0000000000 3
# 24: 24 -0.0178935170 1.0862706 -0.0178935170 3
# 25: 25 0.0081982575 1.0951762 -0.0098419551 3
# 26: 26 0.0014387126 1.0967518 -0.0084174023 3
# 27: 27 0.0004420449 1.0972366 -0.0079790782 3
# 28: 28 -0.0127075238 1.0832934 -0.0205852077 3
# 29: 29 -0.0027815006 1.0802803 -0.0233094505 4
# 30: 30 0.0061794156 1.0869558 -0.0172740737 4
# 31: 31 0.0155867955 1.1038979 -0.0019565256 4
# 32: 32 0.0009721227 1.1049710 -0.0009863049 4
# 33: 33 0.0058767161 1.1114646 0.0000000000 4
# 34: 34 0.0014619496 1.1130896 0.0000000000 4
# 35: 35 -0.0117705956 1.0999878 -0.0117705956 4
# 36: 36 -0.0021499456 1.0976229 -0.0138952351 4
# 37: 37 -0.0019428995 1.0954903 -0.0158111376 4
# 38: 38 0.0014068660 1.0970316 -0.0144265157 4
# 39: 39 0.0130002537 1.1112932 -0.0016138103 4
# 40: 40 0.0096317575 1.1219969 0.0000000000 4
# 41: 41 0.0003547640 1.1223950 0.0000000000 4
# 42: 42 -0.0005336168 1.1217961 -0.0005336168 4
# 43: 43 0.0089696338 1.1318582 0.0000000000 4
# 44: 44 0.0075666320 1.1404225 0.0000000000 4
# 45: 45 -0.0048875569 1.1348486 -0.0048875569 5
# 46: 46 -0.0050749516 1.1290893 -0.0099377044 5
# 47: 47 0.0056458196 1.1354640 -0.0043479913 5
# 48: 48 0.0096853292 1.1464613 0.0000000000 5
# 49: 49 0.0008765379 1.1474662 0.0000000000 5
# 50: 50 0.0108110773 1.1598716 0.0000000000 5
# date ret val draw_down zone
And a simple function to do the same:
func <- function(x, up = 1.05, down = 0.98) {
breaks <- integer(0)
if (!length(x)) return(breaks)
ind <- 1L
while (ind <= length(x)) {
theseind <- seq(ind, length(x))
ratios <- x[theseind] / x[theseind][1]
upordown <- which(ratios >= up | ratios <= down)
if (!length(upordown)) break
breaks <- c(breaks, upordown[1] + ind)
ind <- ind + upordown[1]
}
return(cumsum(seq_along(x) %in% breaks))
}
df[, zone := func(val, 1.05, 0.98) ]
Upvotes: 2