Reputation: 287
Given a tau
vector I want to truncate each row of a data matrix, this is done by the trunc_operator()
function below. Then compute the sample covariance matrix of the data using this truncated data.
Currently I am using the following functions that are written in R code:
trunc_operator = function(tau, x){
x = ifelse(abs(x) > tau, tau*sign(x), x)
return(x)
}
fun_tau = function(tau, data){t(apply(data, 1, trunc_operator, tau = tau))}
mat_prod = function(x,y){
x %*% t(y)
}
acf_fun = function(data, lag = 0){
if(lag != 0){
data_1 = data[-(nrow(data):nrow(data)-lag), ]
data_2 = data[-(1:lag),]
} else {
data_1 = data
data_2 = data
}
acf_mat = mapply(mat_prod, asplit(data_1,1), asplit(data_2,1), SIMPLIFY = F)
acf_mat = 1/length(acf_mat) * Reduce("+", acf_mat)
return(acf_mat)
}
That are then used in this function:
acf_fun_trunc = function(tau, data, lag){
truncated_data = fun_tau(tau = tau, data = data)
truncated_cov = acf_fun(truncated_data)
return(truncated_cov)
}
I now want to carry this out using Rcpp, to speed it up as in a previous similar question, this sped up the code by a lot. I do not know much about cpp code, and was wondering if someone could provide cpp code that carries out the same thing that my R code does?
# Creating Dataset
n = 300
d = 50
data = rt(n = n*50, df = 4.1) / sqrt((4.1/2.1))
data = matrix(data, nrow = n, ncol = d)
tau = 1:50
# Carrying out my R computations
## R functions
trunc_operator = function(tau, x){
x = ifelse(abs(x) > tau, tau*sign(x), x)
return(x)
}
fun_tau = function(tau, data){t(apply(data, 1, trunc_operator, tau = tau))}
mat_prod = function(x,y){
x %*% t(y)
}
acf_fun = function(data, lag = 0){
if(lag != 0){
data_1 = data[-(nrow(data):nrow(data)-lag), ]
data_2 = data[-(1:lag),]
} else {
data_1 = data
data_2 = data
}
acf_mat = mapply(mat_prod, asplit(data_1,1), asplit(data_2,1), SIMPLIFY = F)
acf_mat = 1/length(acf_mat) * Reduce("+", acf_mat)
return(acf_mat)
}
acf_fun_trunc = function(tau, data){
truncated_data = fun_tau(tau = tau, data = data)
truncated_cov = acf_fun(truncated_data)
return(truncated_cov)
}
# Running job
acf_fun_trunc(tau, data)
Upvotes: 0
Views: 77
Reputation: 17011
Before moving to Rcpp, try vectorizing your code. It can be done without apply
, mapply
, asplit
, or Reduce
:
fun_tau2 <- function(tau, data) pmin(pmax(data, -tau), tau)
acf_fun2 <- function(data, lag = 0L) {
if (lag) {
tcrossprod(data[,-(nrow(data):(nrow(data) - lag))], data[,-(1:lag)])/ncol(data)
} else {
tcrossprod(data)/ncol(data)
}
}
acf_fun_trunc2 <- function(tau, data, lag = 0L) acf_fun2(fun_tau2(tau, data), lag)
# Running job
microbenchmark::microbenchmark(
acf_fun_trunc = acf_fun_trunc(tau, data),
acf_fun_trunc2 = acf_fun_trunc2(tau, t(data)),
check = "equal"
)
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> acf_fun_trunc 8101.801 9283.7015 11591.260 9940.4010 14000.3010 21713.101 100
#> acf_fun_trunc2 379.502 413.2015 537.038 445.6015 473.0505 9398.901 100
Also, I think you want data_1 = data[-(nrow(data):nrow(data)-lag), ]
to be data_1 = data[-(nrow(data):(nrow(data)-lag)), ]
instead.
Upvotes: 1
Reputation: 287
I have got this so far:
Rcpp::cppFunction("
NumericMatrix truncateAndComputeCovariance(NumericMatrix data, NumericVector tau) {
int n = data.nrow(); // Number of rows in the data matrix
int p = data.ncol(); // Number of columns in the data matrix
NumericMatrix covariance(p, p); // Matrix to store the covariance
// Loop through each row of the data matrix
for (int i = 0; i < n; i++) {
// Loop through each column of the data matrix
for (int j = 0; j < p; j++) {
// Truncate the data element using the corresponding tau value
if (std::abs(data(i, j)) <= tau[j]) {
data(i, j) = data(i, j); // No truncation needed
} else {
data(i, j) = tau[j] * std::copysign(1.0, data(i, j)); // Truncate to tau with sign
}
}
}
// Compute the covariance without estimating the sample mean
for (int i = 0; i < p; i++) {
for (int j = i; j < p; j++) { // Only need to compute the upper triangular part
double sum = 0.0;
for (int k = 0; k < n; k++) {
sum += data(k, i) * data(k, j);
}
covariance(i, j) = sum / (n);
covariance(j, i) = covariance(i, j); // Covariance matrix is symmetric
}
}
return covariance;
}")
Upvotes: 0