Reputation: 2202
Here is my code. Edit: Caller included at the bottom.
This function reads in a datafile, determines how many rows and columns there are, and then stores the data into data_array.
int getdata(double* *data_array, int* *data_labels, int argc, char *argv[], int *items, int *attr)
{
// filename variables
char *filename; // pointer to a string that will contain the name of the training data file.
char *result = NULL; // used with strtok() to extract each feature value given a line of delimited features.
FILE *fp; // pointer to FILE, we can use this with fgets to access each line
char line[10000]; // array of 1000 chars for storing the raw data for one observation
char delims[4]; // an array of common delimiters for data files
delims[0]='\t';
delims[1]=' ';
delims[2]=',';
delims[3]='\0';
int i, j;
// check that we have the correct number of command line arguments
if (argc < 2)
{
printf("2usage: progname filename\n");
return -1;
}
if (argc < 4)
{
printf("3usage: progname filename num_labels k(nn)\n");
return -1;
}
if (atoi(argv[2]) < 1)
{
printf("num_labels must be a positive integer.\n");
return -1;
}
if (atof(argv[2]) - atoi(argv[2]) > 0)
{
printf("num_labels must be an integer.\n");
return -1;
}
if (atoi(argv[3]) < 1)
{
printf("k must be a positive integer.\n");
return -1;
}
if (atof(argv[3]) - atoi(argv[3]) > 0)
{
printf("k must be an integer.\n");
return -1;
}
// try to open the file
filename = argv[1];
fp = fopen(filename, "r");
if (fp == NULL)
{
printf("could not open file: %s\n", filename);
printf("note: the filename should be the second command line argument, after the .exe file");
return -1;
}
printf("reading file: %s\n", filename);
// GET NUMBER OF ROWS AND COLUMNS OF INPUT FILE
fgets(line, 10000, fp);
result = strtok(line,delims);
while(result != NULL)
{
(*items)++;
result = strtok(NULL, delims);
}
(*attr)++;
while(fgets(line,sizeof(line),fp) != NULL)
(*attr)++;
printf("num items: %d\n", *items);
printf("num attributes: %d\n", *attr);
// NOW THAT YOU HAVE FILE SIZE, ALLOCATE MEMORY TO STORE DATA
*data_array = malloc(*items* *attr*sizeof(double));
//*data_labels = malloc(*items*sizeof(int));
printf("data array size = %d\n\n",*items* *attr);
i=0;
j=0;
fclose(fp);
fp = fopen(filename, "r");
// SAVE DATA INTO DATA_ARRAY
while (fgets(line, 10000, fp) != NULL)
{
// we break line into tokens using our delimeters list declared at the beginning of the function
result = strtok(line,delims);
//printf("%d\n",i);
while(result != NULL) {
(*data_array)[i++] = atof(result);
//printf("%f\n",(*data_array)[i-1]);
result = strtok(NULL, delims);
}
//(*data_labels)[j++] = (int)((*data_array)[--i]);
}
// close the file
fclose(fp);
return 0;
}
int main(int argc, char *argv[])
{
double *data_array = NULL;
int *data_labels = NULL;
int items = 0;
int attr = 0;
if (getdata(&data_array, &data_labels, argc, argv, &items, &attr) == -1)
{
printf("reading input data failed");
return 0;
}
printf("PROCESS COMPLETED WITHOUT SEGFAULT");
#ifdef CROSSVAL
// v-fold cross-validation
if (crossval(data_array, atoi(argv[2]), items, attr, closest_center, centers) == -1)
{
printf("k-means operation failed");
return 0;
}
#else
// coords holds coordinates of one new point
int i;
double coords[attr];
for(i = 0; i<attr;i++)
{
coords[i] = atof(argv[i+4]);
}
/*
// calling knn function
if (knn(data_array, data_labels, atoi(argv[2]), atoi(argv[3]), coords, items, attr) == -1) // use a heap!
{
printf("knn operation failed");
return 0;
}
*/
#endif
free(data_array);
return 0;
}
The weird thing is this function outputs correctly before segfaulting. Given a datafile with 2 columns and 272 rows
, i get "num items: 2, num attributes: 272, data array size = 544"
. But then if I comment out everything after the "printf("num attributes: ...
" line and before the final fclose(fp)
line, it still segfaults. So it's getting through the print statements fine, then segfaulting even though there's no important code after it. So confused.
Upvotes: 0
Views: 104
Reputation: 74078
The program segfaults after the call to getdata()
in the for
loop.
You don't check if there are enough arguments to your program and read attr + 4
arguments, whether they exist or not.
This is also the reason, why it doesn't segfault, when you remove (*attr)++
. Because then, attr
remains 0
and you don't try to read non-existing arguments.
Upvotes: 2