user1956609
user1956609

Reputation: 2202

SegFault with inputting a file

Here is my code. Edit: Caller included at the bottom.

This function reads in a datafile, determines how many rows and columns there are, and then stores the data into data_array.

int getdata(double* *data_array, int* *data_labels, int argc, char *argv[], int *items, int *attr)
{  
// filename variables
char *filename; // pointer to a string that will contain the name of the training data file.
char *result = NULL; // used with strtok() to extract each feature value given a line of delimited features.
FILE *fp; // pointer to FILE, we can use this with fgets to access each line
char line[10000];       // array of 1000 chars for storing the raw data for one observation
char delims[4];         // an array of common delimiters for data files
delims[0]='\t';
delims[1]=' ';
delims[2]=',';
delims[3]='\0';
int i, j;
// check that we have the correct number of command line arguments
if (argc < 2)
{
   printf("2usage: progname filename\n");
   return -1;
}

if (argc < 4)
{
   printf("3usage: progname filename num_labels k(nn)\n");
   return -1;
}

if (atoi(argv[2]) < 1)
{
    printf("num_labels must be a positive integer.\n");
    return -1;
}

if (atof(argv[2]) - atoi(argv[2]) > 0)
{
    printf("num_labels must be an integer.\n");
    return -1;
}

if (atoi(argv[3]) < 1)
{
   printf("k must be a positive integer.\n");
   return -1;
}

if (atof(argv[3]) - atoi(argv[3]) > 0)
{
   printf("k must be an integer.\n");
   return -1;
}

// try to open the file
filename = argv[1];
fp = fopen(filename, "r");
if (fp == NULL)
{
   printf("could not open file: %s\n", filename);
   printf("note: the filename should be the second command line argument, after the .exe file");
    return -1;
 }
 printf("reading file: %s\n", filename);

 // GET NUMBER OF ROWS AND COLUMNS OF INPUT FILE
 fgets(line, 10000, fp);
 result = strtok(line,delims);
 while(result != NULL)
 {
  (*items)++;
  result = strtok(NULL, delims);
 }  
  (*attr)++;
  while(fgets(line,sizeof(line),fp) != NULL)
  (*attr)++;

  printf("num items: %d\n", *items);
  printf("num attributes: %d\n", *attr); 


  // NOW THAT YOU HAVE FILE SIZE, ALLOCATE MEMORY TO STORE DATA
  *data_array = malloc(*items* *attr*sizeof(double));
  //*data_labels = malloc(*items*sizeof(int));
  printf("data array size = %d\n\n",*items* *attr);

  i=0;
  j=0;
  fclose(fp);
  fp = fopen(filename, "r");
  // SAVE DATA INTO DATA_ARRAY
  while (fgets(line, 10000, fp) != NULL) 
  { 

  // we break line into tokens using our delimeters list declared at the beginning of the function
  result = strtok(line,delims);
  //printf("%d\n",i);
   while(result != NULL) {
   (*data_array)[i++] = atof(result);
   //printf("%f\n",(*data_array)[i-1]);
   result = strtok(NULL, delims);
   }
   //(*data_labels)[j++] = (int)((*data_array)[--i]);
   }
   // close the file
   fclose(fp);
   return 0;
    }

    int main(int argc, char *argv[])
    {
      double *data_array = NULL;
      int *data_labels = NULL;
      int items = 0;
      int attr = 0;

      if (getdata(&data_array, &data_labels, argc, argv, &items, &attr) == -1)
      {
        printf("reading input data failed");
        return 0;
      }
      printf("PROCESS COMPLETED WITHOUT SEGFAULT");

      #ifdef CROSSVAL
      // v-fold cross-validation
      if (crossval(data_array, atoi(argv[2]), items, attr, closest_center, centers) == -1)
      {
        printf("k-means operation failed");
        return 0;
      }
      #else

      // coords holds coordinates of one new point
      int i;
      double coords[attr];
      for(i = 0; i<attr;i++)
      {
        coords[i] = atof(argv[i+4]);
      }
    /*
      // calling knn function
      if (knn(data_array, data_labels, atoi(argv[2]), atoi(argv[3]), coords, items, attr) == -1) // use a heap!
      {
        printf("knn operation failed");
        return 0;
      }
    */
      #endif

      free(data_array);
      return 0;
     }

The weird thing is this function outputs correctly before segfaulting. Given a datafile with 2 columns and 272 rows, i get "num items: 2, num attributes: 272, data array size = 544". But then if I comment out everything after the "printf("num attributes: ..." line and before the final fclose(fp) line, it still segfaults. So it's getting through the print statements fine, then segfaulting even though there's no important code after it. So confused.

Upvotes: 0

Views: 104

Answers (1)

Olaf Dietsche
Olaf Dietsche

Reputation: 74078

The program segfaults after the call to getdata() in the for loop.

You don't check if there are enough arguments to your program and read attr + 4 arguments, whether they exist or not.

This is also the reason, why it doesn't segfault, when you remove (*attr)++. Because then, attr remains 0 and you don't try to read non-existing arguments.

Upvotes: 2

Related Questions