Reputation: 1211
I'm fiddling around with the below Rcpp code which works like R's expand.grid(). The goal is to take an unknown quantity of input vectors of any data class and output all combination across the inputs. For example, given a<-c(1,2)
and b<-c(3,4,5)
, the function combine_vectors(list(a,b))
returns:
v1 v2
1 1 3
2 1 4
3 1 5
4 2 3
5 2 4
6 2 5
I have it working for numeric input vectors, but for the life of me can't figure out how to handle both strings and numeric vectors. It seems like the crucial change I need to make is to the
std::vector<std::vector<int>*> combo_vectors;
to be either generic (instead of <int>
) or to switch based on case. Ideally, I would like to do the type checking in C++.
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
DataFrame combine_vectors(List vectors) {
int num_vectors = vectors.length();
std::vector<int> indices(num_vectors, 0);
std::vector<std::vector<int>*> combo_vectors;
for (int i = 0; i < num_vectors; i++) {
combo_vectors.push_back(new std::vector<int>());
}
bool done = false;
while (!done) {
for (int i = 0; i < num_vectors; i++) {
SEXP vector = vectors[i];
switch (TYPEOF(vector)) {
case INTSXP:
combo_vectors[i]->push_back(IntegerVector(vector)[indices[i]]);
break;
case REALSXP:
combo_vectors[i]->push_back(NumericVector(vector)[indices[i]]);
break;
default:
Rcpp::stop("unsupported vector type");
}
}
int i = num_vectors - 1;
while (i >= 0) {
indices[i]++;
if (indices[i] == Rf_length(vectors[i])) {
indices[i] = 0;
i--;
} else {
break;
}
}
if (i < 0) {
done = true;
}
}
int num_combos = combo_vectors[0]->size();
List output(num_vectors);
CharacterVector col_names(num_vectors);
for (int i = 0; i < num_vectors; i++) {
switch (TYPEOF(vectors[i])) {
case INTSXP: {
IntegerVector col(num_combos);
for (int j = 0; j < num_combos; j++) {
col[j] = (*combo_vectors[i])[j];
}
output[i] = col;
break;
}
case REALSXP: {
NumericVector col(num_combos);
for (int j = 0; j < num_combos; j++) {
col[j] = (*combo_vectors[i])[j];
}
output[i] = col;
break;
}
default:
Rcpp::stop("unsupported vector type");
}
col_names[i] = std::string("v") + std::to_string(i + 1);
delete combo_vectors[i];
}
output.attr("names") = col_names;
DataFrame df(output);
return df;
}
Edit: I guess my question is how do I add string vectors? When I added case switching for STRSXP, I get the error "no matching function for call to 'push_back(Rcpp::Vector<16>::Proxy)" Here is the non-working code.
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
DataFrame combine_vectors1(List vectors) {
int num_vectors = vectors.length();
std::vector<int> indices(num_vectors, 0);
std::vector <std::vector<int>*>combo_vectors;
for (int i = 0; i < num_vectors; i++) {
combo_vectors.push_back(new std::vector<int>());
}
bool done = false;
while (!done) {
for (int i = 0; i < num_vectors; i++) {
SEXP vector = vectors[i];
switch (TYPEOF(vector)) {
case INTSXP:
combo_vectors[i]->push_back(IntegerVector(vector)[indices[i]]);
break;
case REALSXP:
combo_vectors[i]->push_back(NumericVector(vector)[indices[i]]);
break;
case STRSXP:
combo_vectors[i]->push_back(CharacterVector(vector)[indices[i]]);
break;
default:
Rcpp::stop("unsupported vector type");
}
}
int i = num_vectors - 1;
while (i >= 0) {
indices[i]++;
if (indices[i] == Rf_length(vectors[i])) {
indices[i] = 0;
i--;
} else {
break;
}
}
if (i < 0) {
done = true;
}
}
int num_combos = combo_vectors[0]->size();
List output(num_vectors);
CharacterVector col_names(num_vectors);
for (int i = 0; i < num_vectors; i++) {
switch (TYPEOF(vectors[i])) {
case INTSXP: {
IntegerVector col(num_combos);
for (int j = 0; j < num_combos; j++) {
col[j] = (*combo_vectors[i])[j];
}
output[i] = col;
break;
}
case REALSXP: {
NumericVector col(num_combos);
for (int j = 0; j < num_combos; j++) {
col[j] = (*combo_vectors[i])[j];
}
output[i] = col;
break;
}
case STRSXP: {
CharacterVector col(num_combos);
for (int j = 0; j < num_combos; j++) {
col[j] = (*combo_vectors[i])[j];
}
output[i] = col;
break;
}
default:
Rcpp::stop("unsupported vector type");
}
col_names[i] = std::string("v") + std::to_string(i + 1);
delete combo_vectors[i];
}
output.attr("names") = col_names;
DataFrame df(output);
return df;
Upvotes: 1
Views: 127
Reputation: 1211
Here is the solution I came up with this evening. Mine is not quite and compact as @Davis Herring's answer, but it works now, and I'm posting in case it's helpful to someone else.
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
List expand(List inputVectors) {
//number of input vectors
int num_vectors = inputVectors.length();
//length of each vector
IntegerVector vectorLengths(num_vectors);
//names of output colums
CharacterVector col_names(num_vectors);
// Vector of vector length multiples
IntegerVector mult_Vector(num_vectors);
//Output
List output(num_vectors);
//get the input vector lengths and running multiples
int mult = 1;
for (int i = 0; i < num_vectors; i++) {
//column names
col_names[i] = std::string("v") + std::to_string(i + 1);
//vector lengths
switch( TYPEOF(inputVectors[i]) ) {
case REALSXP: {
vectorLengths[i] = ((NumericVector)inputVectors[i]).length();
}
case INTSXP: {
vectorLengths[i] = ((IntegerVector)inputVectors[i]).length();
}
case STRSXP: {
vectorLengths[i] = ((CharacterVector)inputVectors[i]).length();
}
}
//running multiples
mult_Vector[i]= vectorLengths[i] * mult;
mult = mult_Vector[i];
}
//product of vector lengths
int product = *(std::end(mult_Vector)-1);
//build the expanded combinations
int l=0;
for( List::iterator it = inputVectors.begin(); it != inputVectors.end(); ++it ) {
switch( TYPEOF(*it) ) {
case VECSXP: {
*it = expand(*it);
break;
}
case REALSXP: {
NumericVector tmp(product);
int combinations = product / vectorLengths[l];
int mult_length = product / mult_Vector[l];
int n = 0;
for (int m = 0; m < combinations; m++) {
for (int k = 0; k < vectorLengths[l]; k++) {
for (int j = 0; j < mult_length && n < product; j++) {
tmp[n] = ((NumericVector)inputVectors[l])[k];
n++;
}
}
}
output[l]= tmp;
l++;
break;
}
case INTSXP: {
if( Rf_isFactor(*it) ) break; // factors have internal type INTSXP too
IntegerVector tmp(product);
int combinations = product / vectorLengths[l];
int mult_length = product / mult_Vector[l];
int n = 0;
for (int m = 0; m < combinations; m++) {
for (int k = 0; k < vectorLengths[l]; k++) {
for (int j = 0; j < mult_length && n < product; j++) {
tmp[n] = ((IntegerVector)inputVectors[l])[k];
n++;
}
}
}
output[l]= tmp;
l++;
break;
}
case STRSXP: {
CharacterVector tmp(product);
int combinations = product / vectorLengths[l];
int mult_length = product / mult_Vector[l];
int n = 0;
for (int m = 0; m < combinations; m++) {
for (int k = 0; k < vectorLengths[l]; k++) {
for (int j = 0; j < mult_length && n < product; j++) {
tmp[n] = ((CharacterVector)inputVectors[l])[k];
n++;
}
}
}
output[l]= tmp;
l++;
break;
}
default: {
stop("incompatible SEXP encountered; only accepts lists containing lists, REALSXPs, and INTSXPs");
}
}
}
output.attr("names") = col_names;
DataFrame df(output);
return df;
}
Upvotes: 0
Reputation: 40023
Not that I know R or Rcpp, but it seems simple enough to skip combo_vectors
and pack sublists (out1
, which must itself be a generic List
to be heterogeneous) directly into output
:
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
DataFrame combine_vectors1(List vectors) {
int num_vectors = vectors.length();
List output;
std::vector<int> indices(num_vectors, 0);
while (true) {
List out1(num_vectors);
for (int i = 0; i < num_vectors; i++) {
SEXP vector = vectors[i];
switch (TYPEOF(vector)) {
case INTSXP:
out1[i] = IntegerVector(vector)[indices[i]];
break;
case REALSXP:
out1[i] = NumericVector(vector)[indices[i]];
break;
case STRSXP:
out1[i] = CharacterVector(vector)[indices[i]];
break;
default:
Rcpp::stop("unsupported vector type");
}
}
output.push_back(out1);
int i = num_vectors - 1;
while (i >= 0) {
indices[i]++;
if (indices[i] == Rf_length(vectors[i])) {
indices[i] = 0;
i--;
} else {
break;
}
}
if (i < 0) break;
}
CharacterVector col_names(num_vectors);
for (int i = 0; i < num_vectors; i++) {
col_names[i] = std::string("v") + std::to_string(i + 1);
}
output.attr("names") = col_names;
DataFrame df(output);
return df;
}
Upvotes: 0