Reputation: 528
I'm using LIBSVM for a ML project and I would like to have as output in cross validation mode (-v option)multiple evaluation function like precision, recall, fscore etc.
Actually I was following this guide. I made the changes but when I'm compiling I'm having errors like eval.cpp:32:10: error: redefinition of 'validation_function' double (*validation_function)(const dvec_t&, const ivec_t&) = recall
Here the entire eval.cpp script
#include <iostream>
#include <vector>
#include <algorithm>
#include <errno.h>
#include <cstring>
#include "svm.h"
#include "eval.h"
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
typedef std::vector<double> dvec_t;
typedef std::vector<int> ivec_t;
// prototypes of evaluation functions
double precision(const dvec_t& dec_values, const ivec_t& ty);
double recall(const dvec_t& dec_values, const ivec_t& ty);
double fscore(const dvec_t& dec_values, const ivec_t& ty);
double bac(const dvec_t& dec_values, const ivec_t& ty);
double auc(const dvec_t& dec_values, const ivec_t& ty);
double accuracy(const dvec_t& dec_values, const ivec_t& ty);
double ap(const dvec_t& dec_values, const ivec_t& ty);
// evaluation function pointer
//double eval_func(const dvec_t& dec_values, const ivec_t& ty);
// You can assign this pointer to any above prototype
double (*validation_function)(const dvec_t&, const ivec_t&) = accuracy;
double (*validation_function)(const dvec_t&, const ivec_t&) = precision;
double (*validation_function)(const dvec_t&, const ivec_t&) = recall;
static char *line = NULL;
static int max_line_len;
static char* readline(FILE *input)
{
int len;
if(fgets(line,max_line_len,input) == NULL)
return NULL;
while(strrchr(line,'\n') == NULL)
{
max_line_len *= 2;
line = (char *) realloc(line,max_line_len);
len = (int) strlen(line);
if(fgets(line+len,max_line_len-len,input) == NULL)
break;
}
return line;
}
double precision(const dvec_t& dec_values, const ivec_t& ty){
size_t size = dec_values.size();
size_t i;
int tp, fp;
double precision;
tp = fp = 0;
for(i = 0; i < size; ++i) if(dec_values[i] >= 0){
if(ty[i] == 1) ++tp;
else ++fp;
}
if(tp + fp == 0){
fprintf(stderr, "warning: No positive predict label.\n");
precision = 0;
}else
precision = tp / (double) (tp + fp);
printf("Precision = %g%% (%d/%d)\n", 100.0 * precision, tp, tp + fp);
return precision;
}
double recall(const dvec_t& dec_values, const ivec_t& ty){
size_t size = dec_values.size();
size_t i;
int tp, fn; // true_positive and false negative
double recall;
tp = fn = 0;
for(i = 0; i < size; ++i) if(ty[i] == 1){ // true label is 1
if(dec_values[i] >= 0) ++tp; // predict label is 1
else ++fn; // predict label is -1
}
recall = tp / (double) (tp + fn);
// print result in case of invocation in prediction
printf("Recall = %g%%\n", 100.0 * recall);
return recall; // return the evaluation value
}
double fscore(const dvec_t& dec_values, const ivec_t& ty){
size_t size = dec_values.size();
size_t i;
int tp, fp, fn;
double precision, recall;
double fscore;
tp = fp = fn = 0;
for(i = 0; i < size; ++i)
if(dec_values[i] >= 0 && ty[i] == 1) ++tp;
else if(dec_values[i] >= 0 && ty[i] == -1) ++fp;
else if(dec_values[i] < 0 && ty[i] == 1) ++fn;
if(tp + fp == 0){
fprintf(stderr, "warning: No postive predict label.\n");
precision = 0;
}else
precision = tp / (double) (tp + fp);
if(tp + fn == 0){
fprintf(stderr, "warning: No postive true label.\n");
recall = 0;
}else
recall = tp / (double) (tp + fn);
if(precision + recall == 0){
fprintf(stderr, "warning: precision + recall = 0.\n");
fscore = 0;
}else
fscore = 2 * precision * recall / (precision + recall);
printf("F-score = %g\n", fscore);
return fscore;
}
double bac(const dvec_t& dec_values, const ivec_t& ty){
size_t size = dec_values.size();
size_t i;
int tp, fp, fn, tn;
double specificity, recall;
double bac;
tp = fp = fn = tn = 0;
for(i = 0; i < size; ++i)
if(dec_values[i] >= 0 && ty[i] == 1) ++tp;
else if(dec_values[i] >= 0 && ty[i] == -1) ++fp;
else if(dec_values[i] < 0 && ty[i] == 1) ++fn;
else ++tn;
if(tn + fp == 0){
fprintf(stderr, "warning: No negative true label.\n");
specificity = 0;
}else
specificity = tn / (double)(tn + fp);
if(tp + fn == 0){
fprintf(stderr, "warning: No positive true label.\n");
recall = 0;
}else
recall = tp / (double)(tp + fn);
bac = (specificity + recall) / 2;
printf("BAC = %g\n", bac);
return bac;
}
// for auc and ap
class Comp{
const double *dec_val;
public:
Comp(const double *ptr): dec_val(ptr){}
bool operator()(int i, int j) const{
return dec_val[i] > dec_val[j];
}
};
double auc(const dvec_t& dec_values, const ivec_t& ty){
double roc = 0;
size_t size = dec_values.size();
size_t i;
std::vector<size_t> indices(size);
for(i = 0; i < size; ++i) indices[i] = i;
std::sort(indices.begin(), indices.end(), Comp(&dec_values[0]));
int tp = 0,fp = 0;
for(i = 0; i < size; i++) {
if(ty[indices[i]] == 1) tp++;
else if(ty[indices[i]] == -1) {
roc += tp;
fp++;
}
}
if(tp == 0 || fp == 0)
{
fprintf(stderr, "warning: Too few postive true labels or negative true labels\n");
roc = 0;
}
else
roc = roc / tp / fp;
printf("AUC = %g\n", roc);
return roc;
}
double accuracy(const dvec_t& dec_values, const ivec_t& ty){
int correct = 0;
int total = (int) ty.size();
size_t i;
for(i = 0; i < ty.size(); ++i)
if(ty[i] == (dec_values[i] >= 0? 1: -1)) ++correct;
printf("Accuracy = %g%% (%d/%d)\n",
(double)correct/total*100,correct,total);
return (double) correct / total;
}
double ap(const dvec_t& dec_values, const ivec_t& ty){
size_t size = dec_values.size();
size_t i;
std::vector<size_t> indices(size);
for(i = 0; i < size; ++i) indices[i] = i;
std::sort(indices.begin(), indices.end(), Comp(&dec_values[0]));
int p = 0, tp = 0;
double prev_recall = 0, area = 0;
for(i = 0; i < size; ++i) p += (ty[i] == 1);
if(p == 0) {
fprintf(stderr, "warning: Too few postive labels\n");
return 0;
}
for(i = 0; i < size; ++i) {
tp += (ty[indices[i]] == 1);
if(i+1 < size && dec_values[indices[i]] == dec_values[indices[i+1]])
continue;
double recall = (double)tp/p;
double precision = (double)tp/(double)(i+1);
area += precision*(recall-prev_recall);
prev_recall = recall;
}
printf("AP = %g\n", area);
return area;
}
double binary_class_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold)
{
int i;
int *fold_start = Malloc(int,nr_fold+1);
int l = prob->l;
int *perm = Malloc(int,l);
int *labels;
dvec_t dec_values;
ivec_t ty;
for(i=0;i<l;i++) perm[i]=i;
for(i=0;i<l;i++)
{
int j = i+rand()%(l-i);
std::swap(perm[i],perm[j]);
}
for(i=0;i<=nr_fold;i++)
fold_start[i]=i*l/nr_fold;
for(i=0;i<nr_fold;i++)
{
int begin = fold_start[i];
int end = fold_start[i+1];
int j,k;
struct svm_problem subprob;
subprob.l = l-(end-begin);
subprob.x = Malloc(struct svm_node*,subprob.l);
subprob.y = Malloc(double,subprob.l);
k=0;
for(j=0;j<begin;j++)
{
subprob.x[k] = prob->x[perm[j]];
subprob.y[k] = prob->y[perm[j]];
++k;
}
for(j=end;j<l;j++)
{
subprob.x[k] = prob->x[perm[j]];
subprob.y[k] = prob->y[perm[j]];
++k;
}
struct svm_model *submodel = svm_train(&subprob,param);
int svm_type = svm_get_svm_type(submodel);
if(svm_type == NU_SVR || svm_type == EPSILON_SVR){
fprintf(stderr, "wrong svm type");
exit(1);
}
labels = Malloc(int, svm_get_nr_class(submodel));
svm_get_labels(submodel, labels);
if(svm_get_nr_class(submodel) > 2)
{
fprintf(stderr,"Error: the number of class is not equal to 2\n");
exit(-1);
}
dec_values.resize(end);
ty.resize(end);
for(j=begin;j<end;j++) {
svm_predict_values(submodel,prob->x[perm[j]], &dec_values[j]);
ty[j] = (prob->y[perm[j]] > 0)? 1: -1;
}
if(labels[0] <= 0) {
for(j=begin;j<end;j++)
dec_values[j] *= -1;
}
svm_free_and_destroy_model(&submodel);
free(subprob.x);
free(subprob.y);
free(labels);
}
free(perm);
free(fold_start);
return validation_function(dec_values, ty);
}
void binary_class_predict(FILE *input, FILE *output){
int total = 0;
int *labels;
int max_nr_attr = 64;
struct svm_node *x = Malloc(struct svm_node, max_nr_attr);
dvec_t dec_values;
ivec_t true_labels;
int svm_type=svm_get_svm_type(model);
if (svm_type==NU_SVR || svm_type==EPSILON_SVR){
fprintf(stderr, "wrong svm type.");
exit(1);
}
labels = Malloc(int, svm_get_nr_class(model));
svm_get_labels(model, labels);
max_line_len = 1024;
line = (char *)malloc(max_line_len*sizeof(char));
while(readline(input) != NULL)
{
int i = 0;
double target_label, predict_label;
char *idx, *val, *label, *endptr;
int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
label = strtok(line," \t");
target_label = strtod(label,&endptr);
if(endptr == label)
exit_input_error(total+1);
while(1)
{
if(i>=max_nr_attr - 2) // need one more for index = -1
{
max_nr_attr *= 2;
x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
}
idx = strtok(NULL,":");
val = strtok(NULL," \t");
if(val == NULL)
break;
errno = 0;
x[i].index = (int) strtol(idx,&endptr,10);
if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
exit_input_error(total+1);
else
inst_max_index = x[i].index;
errno = 0;
x[i].value = strtod(val,&endptr);
if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
exit_input_error(total+1);
++i;
}
x[i].index = -1;
predict_label = svm_predict(model,x);
fprintf(output,"%g\n",predict_label);
double dec_value;
svm_predict_values(model, x, &dec_value);
true_labels.push_back((target_label > 0)? 1: -1);
if(labels[0] <= 0) dec_value *= -1;
dec_values.push_back(dec_value);
}
validation_function(dec_values, true_labels);
fscore(dec_values, true_labels);
precision(dec_values, true_labels);
recall(dec_values, true_labels);
free(labels);
free(x);
}
Having just one of the evaluation function like double (*validation_function)(const dvec_t&, const ivec_t&) = precision;
everything works fine and I'm having a proper output like this:
optimization finished, #iter = 10012
nu = 0.519566
obj = -121646.218467, rho = -59.755150
nSV = 4299, nBSV = 4010
Total nSV = 4299
Precision = 81.18% (5875/7237)
Cross Validation = 81.18%
However I would like to have something like:
optimization finished, #iter = 10012
nu = 0.519566
obj = -121646.218467, rho = -59.755150
nSV = 4299, nBSV = 4010
Total nSV = 4299
Precision = 81.18% (5875/7237)
Recall = XX.XX% (5875/7237)
Accuracy = XX.XX% (5875/7237)
Cross Validation = 81.18%
Is that feasible? Someone of you had similar experiences?
Regards
Upvotes: 1
Views: 86
Reputation: 8274
Since each of the "evaluation functions" use printf
to print the value they compute, if you start with the original code from here, it might be easier to simply call them in order before the return validation_function(dec_values, ty);
statement in binary_class_cross_validation
. Something like this:
double binary_class_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold)
{
// ...
precision(dec_values, ty);
recall(dec_values, ty);
accuracy(dec_values, ty);
return validation_function(dec_values, ty);
}
Note that whichever function you set as the validation_function
(by default auc
) will compute the "Cross Validation" value that gets printed in the main()
function of svm-train.c
as described in the tutorial.
Upvotes: 1
Reputation: 632
At a glance it somewhat is not a libsvm problem, but a cpp one. Haven't run your code, but I think a simple way you may want to do is
double* binary_class_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold)
{
...
double* eval = Malloc(double,3);
eval[0] = precision(dec_values, true_labels);
eval[1] = recall(dec_values, true_labels);
eval[2] = accuracy(dec_values, true_labels);
return eval;
}
Upvotes: 0