Taufik Isyhamudin
Taufik Isyhamudin

Reputation: 1

How to read CSV file into 2d array of struct with each element having a determining character (C, G, M) to sort into the struct?

I am trying to create a 2d map of array using the csv input

   5,4
   ,,,C 200
   ,G Vibranium Shield:hands:990,,C 50
   M Healing Potion:85,,M Defence Enchanment:360,
   ,,,
   ,,G Lighsaber:hands:850,5,4

The first row is the size of the array given.

The problem I am having right now is how to still count the empty list in the csv as a row and column in the array such as ",,,". Plus, how to read the determining character (C, G, M) in order to store the element in the struct. Example, G Vibranium Shield:hands:990, G will be the determining character stored in char type which then i use the switch case to store other element into the appropriate struct.

I tried to use fgets() strtok() but I can't read separately the determining element from other element in the CSV. As from other example it seem it need prior knowledge into which element will be in the line and predetermine the read line and not based on the determining character in the CSV. Thus I used fscanf to read:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct map
{
char types;
char geartypes[100];
int coins;
int values;
char items[100];
}map;

struct map **m;

int main()
{
    FILE* mapz;
    int i,j,h;
    int width,height;
    char a;
    mapz=fopen("map.csv","r");

    if(mapz!=NULL)
    {
        fscanf(mapz,"%d,%d",&height,&width);
        map **m=(map **)malloc(height * sizeof(map *)); 
        for(i=0;i<height;i++)
        {
            m[i]=(map*)malloc(width * sizeof(map)); 
        }
        for(h=0;h<height;h++)
        {
            for(j=0;j<width;j++)
            {
                fscanf(mapz,"%c",&a);
                switch(a)
                {
                case('C'):
                    m[h][j].types=a;
                    fscanf(mapz,"%d",&m[h][j].coins);
                    break;
                case('G'):
                    m[h][j].types=a;
                    fscanf(mapz,"%[^,:]s",m[h][j].items);
                    fscanf(mapz,"%[^,:]s",m[h][j].geartypes);
                    fscanf(mapz,"%d",&m[h][j].values);
                    break;
                case('M'):
                    m[h][j].types=a;
                    fscanf(mapz,"%[^,:]s",m[h][j].items);
                    fscanf(mapz,"%d",&m[h][j].values);
                    break;
                }

            }
        }   
        for(h=0;h<height;h++)
        {
            for(j=0;j<width;j++)
            {
                switch(m[h][j].types)
                {
                case('C'):
                    printf("%c",m[h][j].types);
                    printf("%d\n",m[h][j].coins);
                    break;
                case('G'):
                    printf("%c",m[h][j].types);
                    printf("%s%s%d\n",m[h][j].items,m[h][j].geartypes,m[h][j].values);
                    break;
                case('M'):
                    printf("%c",m[h][j].types);
                    printf("%s%d\n",m[h][j].items,m[h][j].values);
                    break;
                }
            }
        }   
    }
    else
    {
        printf("No such file in directory");
    }
    fclose(mapz);
    return 0;

I tried to use fscanf but it seem to also read the "," which messed up the for count. When i ran the code it come out blank.

Upvotes: 0

Views: 332

Answers (2)

James K. Lowden
James K. Lowden

Reputation: 7837

While I have no qualms with David C. Rankin's answer, here's a different approach that uses regular expressions:

#include <assert.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <sys/types.h>
#include <regex.h>

char line[4096];

int main( int argc, char *argv[] ) {
  if( !argv[1] )
    errx(EXIT_FAILURE, "missing input"); 

  FILE *input = fopen(argv[1], "r");
  if( !input  )
    err(EXIT_FAILURE, "could not open %s", argv[1]);

  if( NULL == fgets(line, sizeof(line), input) )
    err(EXIT_FAILURE, "could not read %s", argv[1]);

  int nr, nf, nfield;
  if( 2 != sscanf(line, "%d,%d", &nr, &nfield) ) 
    err(EXIT_FAILURE, "failed to parse first line");
  printf( "reading %d lines of %d fields each\n", nr, nfield );

  int erc;
  regex_t reg;
  const char fmt[] = "([^,\n]*)[,\n]";
  char *regex = calloc( nfield, 1 + strlen(fmt) );
  for( int i=0; i < nfield; i++ ) {
    strcat(regex, fmt);
  }

  int cflags = REG_EXTENDED;
  char errbuf[128];
  size_t len = sizeof(errbuf);
  const char *truncated = "";

  if( (erc = regcomp(&reg, regex, cflags)) != 0 ) {
    if( (len = regerror(erc, &reg, errbuf, len)) > sizeof(errbuf) ) 
      truncated = "(truncated)";
    errx(EXIT_FAILURE, "%s %s", errbuf, truncated);
  }

  for( int i=0; i < nr && NULL != fgets(line, sizeof(line), input); i++ ) {
    regmatch_t matches[1 + nfield];
    const int eflags = 0;

    printf("%s", line);

    if( (erc = regexec(&reg, line, 1 + nfield, matches, eflags)) != 0 ) {
      if( (len = regerror(erc, &reg, errbuf, len)) > sizeof(errbuf) ) 
        truncated = "(truncated)";
      errx(EXIT_FAILURE, "regex error: %s %s", errbuf, truncated);
    }

    for( nf=1; nf < nfield + 1 && matches[nf].rm_so != -1; nf++ ) {
      assert(matches[nf].rm_so <= matches[nf].rm_eo);
      printf( "%4d: '%.*s'\n",
          nf,
          (int)(matches[nf].rm_eo - matches[nf].rm_so),
          line + matches[nf].rm_so );
    }
  }

  return EXIT_SUCCESS;
}  

It's only a little longer (mostly to handle errors). What I like is that once regexec(3) is called, the fields are all set up in the matches array.

Upvotes: 0

David C. Rankin
David C. Rankin

Reputation: 84561

Since you are stuck on handling empty fields when you are tokenizing each line, let's look at using strsep to handle that for you. There are a few caveats about using strsep. First note the type of the first parameter. It is char **. That means you cannot read each line into a fixed character array and pass the address of a fixed array (it would not be char**, but instead char (*)[length]). Next, since strsep will update the pointer provided as the first parameter, you cannot simply give it the address of the allocated buffer you are using to store each line you read (you would lose the pointer to the start of the allocated block and be unable to free() the memory or read more than one line.

So, bottom line, you need an allocated buffer to hold the text you are going to pass to strsep, and then your need 2 pointers, one to capture the return from strsep and one to pass the address of to strsep (to allow you to preserve your original buffer pointer).

With that in mind, you can parse your CSV with empty fields similar to:

    while (fgets (buf, MAXC, fp)) { /* read each line in file */
        size_t i = 0;       /* counter */
        p = fields = buf;   /* initialize pointers to use with strsep */
        printf ("\nline %2zu:\n", n++ + 1);         /* output heading */
        while ((p = strsep (&fields, DELIM))) {     /* call strsep */
            p[strcspn(p, "\r\n")] = 0;              /* trim '\n' (last) */
            printf ("  field %2zu: '%s'\n", i++ + 1, p); /* output field */
        }
    }

Putting that together in a full example using your data, you can do something similar to:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXC  1024      /* if you need a constant, #define one (or more) */
#define DELIM ","       /* (numeric or string) */

int main (int argc, char **argv) {

    size_t n = 0, lines, nflds;
    char *buf, *fields, *p; /* must use 2 pointers for strsep */
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }

    if (!(buf = malloc (MAXC))) {   /* allocate storage for buffer */
        perror ("malloc-buf");      /* cannot be array with strsep */
        return 1;
    }

    if (!fgets (buf, MAXC, fp)) {   /* read/validate 1st line */
        fputs ("error: insufficient input line 1.\n", stderr);
        return 1;
    }   /* convert to lines and no. of fields (lines not needed) */
    if (sscanf (buf, "%zu,%zu", &lines, &nflds) != 2) {
        fputs ("error: invalid format line 1.\n", stderr);
        return 1;
    }

    while (fgets (buf, MAXC, fp)) { /* read each line in file */
        size_t i = 0;       /* counter */
        p = fields = buf;   /* initialize pointers to use with strsep */
        printf ("\nline %2zu:\n", n++ + 1);         /* output heading */
        while ((p = strsep (&fields, DELIM))) {     /* call strsep */
            p[strcspn(p, "\r\n")] = 0;              /* trim '\n' (last) */
            printf ("  field %2zu: '%s'\n", i++ + 1, p); /* output field */
        }
    }
    if (fp != stdin) fclose (fp);   /* close file if not stdin */
    free (buf);  /* free allocated memory */

    return 0;
}

Example Input File

$ cat dat/emptyflds.csv
5,4
,,,C 200
,G Vibranium Shield:hands:990,,C 50
M Healing Potion:85,,M Defence Enchanment:360,
,,,
,,G Lighsaber:hands:850,5,4

Example Use/Output

The example simply prints the line number and then each separated field on a separate line below it so you can confirm the separation:

$ ./bin/strcspnsepcsv <dat/emptyflds.csv

line  1:
  field  1: ''
  field  2: ''
  field  3: ''
  field  4: 'C 200'

line  2:
  field  1: ''
  field  2: 'G Vibranium Shield:hands:990'
  field  3: ''
  field  4: 'C 50'

line  3:
  field  1: 'M Healing Potion:85'
  field  2: ''
  field  3: 'M Defence Enchanment:360'
  field  4: ''

line  4:
  field  1: ''
  field  2: ''
  field  3: ''
  field  4: ''

line  5:
  field  1: ''
  field  2: ''
  field  3: 'G Lighsaber:hands:850'
  field  4: '5'
  field  5: '4'

(note: line 5 contains a 5th field that exceeds expected no. of fields)

To handle further separation within the fields on ':' or whatever else you need, you are free to call strtok on the pointer p within the field tokenization while loop.

Upvotes: 1

Related Questions