Reputation: 5951
It's best explained with an example.
I have a vector, or column from data.frame
named vec
:
vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
I would like a vectorized process (not a for
loop) to change the three trailing NA
when a 1
is observed.
The end vector would be:
c(NA, NA, 1, 1, 1, 1, NA, 1, 1, 1, 1, NA, NA, NA)
If we had:
vec <- c(NA, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA)
The end vector would look like:
c(NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, NA)
A very badly written solution is:
vec2 <- vec
for(i in index(v)){
if(!is.na(v[i])) vec2[i] <- 1
if(i>3){
if(!is.na(vec[i-1])) vec2[i] <- 1
if(!is.na(vec[i-2])) vec2[i] <- 1
if(!is.na(vec[i-3])) vec2[i] <- 1
}
if(i==3){
if(!is.na(vec[i-1])) vec2[i] <- 1
if(!is.na(vec[i-2])) vec2[i] <- 1
}
if(i==2){
if(!is.na(vec[i-1])) vec2[i] <- 1
}
}
Upvotes: 13
Views: 2632
Reputation: 24480
Another option:
`[<-`(vec,c(outer(which(vec==1),1:3,"+")),1)
# [1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
Although the above works with the examples, it stretches the length of vec
if a 1 is found in the last positions. Better to make a simple check and wrap into a function:
threeNAs<-function(vec) {
ind<-c(outer(which(vec==1),1:3,"+"))
ind<-ind[ind<=length(vec)]
`[<-`(vec,ind,1)
}
Upvotes: 19
Reputation: 83215
Another fast solution:
vec[rep(which(vec == 1), each = 3) + c(1:3)] <- 1
which gives:
> vec [1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
Benchmarking is only really useful when done on larger datasets. A benchmark with a 10k larger vector and the several posted solutions:
library(microbenchmark)
microbenchmark(ans.jaap = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[rep(which(vec == 1), each = 3) + c(1:3)] <- 1},
ans.989 = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
r <- which(vec==1);
vec[c(mapply(seq, r, r+3))] <- 1},
ans.sotos = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[unique(as.vector(t(sapply(which(vec == 1), function(i) seq(i+1, length.out = 3)))))] <- 1},
ans.gregor = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[is.na(vec)] <- 0;
n <- length(vec);
vec <- vec + c(0, vec[1:(n-1)]) + c(0, 0, vec[1:(n-2)]) + c(0, 0, 0, vec[1:(n-3)]);
vec[vec == 0] <- NA},
ans.moody = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
output <- sapply(1:length(vec),function(i){any(!is.na(vec[max(0,i-3):i]))});
output[output] <- 1;
output[output==0] <- NA},
ans.nicola = {vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
`[<-`(vec,c(outer(which(vec==1),1:3,"+")),1)})
which gives the following benchmark:
Unit: microseconds expr min lq mean median uq max neval cld ans.jaap 1778.905 1937.414 3064.686 2100.595 2257.695 86233.593 100 a ans.989 87688.166 89638.133 96992.231 90986.269 93326.393 182431.366 100 c ans.sotos 125344.157 127968.113 132386.664 130117.438 132951.380 214460.174 100 d ans.gregor 4036.642 5824.474 10861.373 6533.791 7654.587 87806.955 100 b ans.moody 173146.810 178369.220 183698.670 180318.799 184000.062 264892.878 100 e ans.nicola 966.927 1390.486 1723.395 1604.037 1904.695 3310.203 100 a
Upvotes: 14
Reputation: 51582
A non-Vectorized solution, but nevertheless, another option using base R,
vec[unique(as.vector(t(sapply(which(vec == 1), function(i) seq(i+1, length.out = 3)))))] <- 1
vec
#[1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
vec1[unique(as.vector(t(sapply(which(vec1 == 1), function(i) seq(i+1, length.out = 3)))))] <- 1
vec1
#[1] NA NA 1 1 1 1 1 1 1 1 1 NA NA NA
Upvotes: 2
Reputation: 47300
With sapply
, any
, and is.na
:
output <- sapply(1:length(vec),function(i){any(!is.na(vec[max(0,i-3):i]))})
output[output] <- 1
output[output==0] <- NA
Upvotes: 0
Reputation: 12937
How about this:
r <- which(vec==1)
vec[c(mapply(seq, r, r+3))] <- 1
Examples:
vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
#[1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
vec <- c(NA, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA)
#[1] NA NA 1 1 1 1 1 1 1 1 1 NA NA NA
Upvotes: -1
Reputation: 26248
What really is 'vectorised', if not a loop written in a C-language?
Here's a C++ loop that benchmarks well.
vec <- c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA)
library(Rcpp)
cppFunction('NumericVector fixVec(NumericVector myVec){
int n = myVec.size();
int foundCount = 0;
for(int i = 0; i < n; i++){
if(myVec[i] == 1) foundCount = 1;
if(ISNA(myVec[i])){
if(foundCount >= 1 & foundCount <= 3){
myVec[i] = 1;
foundCount++;
}
}
}
return myVec;
}')
fixVec(vec)
# [1] NA NA 1 1 1 1 NA 1 1 1 1 NA NA NA
Benchmarks
library(microbenchmark)
microbenchmark(
ans.jaap = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec[rep(which(vec == 1), each = 4) + c(0:3)] <- 1
},
ans.nicola = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
`[<-`(vec,c(outer(which(vec==1),0:3,"+")),1)
},
ans.symbolix = {
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4);
vec <- fixVec(vec)
}
)
# Unit: microseconds
# expr min lq mean median uq max neval
# ans.jaap 2017.789 2264.318 2905.2437 2579.315 3588.4850 4667.249 100
# ans.nicola 1242.002 1626.704 3839.4768 2095.311 3066.4795 81299.962 100
# ans.symbolix 504.577 533.426 838.5661 718.275 966.9245 2354.373 100
vec <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4)
vec <- fixVec(vec)
vec2 <- rep(c(NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA),1e4)
vec2[rep(which(vec2 == 1), each = 4) + c(0:3)] <- 1
identical(vec, vec2)
# [1] TRUE
Upvotes: 5
Reputation: 7635
The following code does what you asked for. It involves "shifting" the vector and then adding the shifted versions
vec[is.na(vec)] <- 0
n <- length(vec)
vec <- vec + c(0, vec[1:(n-1)]) + c(0, 0, vec[1:(n-2)]) + c(0, 0, 0, vec[1:(n-3)])
vec[vec == 0] <- NA
vec[vec != 0] <- 1
# vec | 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0 ,0, 0
# c(0, vec[1:(n-1)]) | + 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0 ,0, 0
# c(0, 0, vec[1:(n-2)]) | + 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0 ,0
# c(0,0,0,vec[1:(n-3)]) | + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0
# |-------------------------------------------
# | 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0
Upvotes: 3