Jonny93
Jonny93

Reputation: 1

Having trouble reading from text file into a struct array

I recently started at university with C programming (beginner course), and now we are doing our final examination which is about a patients' database.

I'm required to read data from a text file to a struct array (size 10000). The file contains 2 string arrays (personal identification string (10 numbers seperated by a '-') and name string), 1 int array containing photo references and 1 integer containing the amount of photo references per patient. I have tried fscanf but the program just hangs whenever i try to read, when i use fgets, it reads the whole line and stores the integers from the photo reference array into my name array (middle one). I am wondering how I should go about doing this, I've spent days trying to figure out a solution but nothing seems to work. This is what my text file looks like:

123456-1234   Name Name     [1, 2, 3, 4]
234567-2345   Name2 Name2   [1, 2]
345678-3456   Name3 Name3   []

And this is my write_to_file function which writes to the file when the program exits:

void write_to_file(Patient reg[], int *pNr_of_patients){
FILE *fp;
fp=fopen("file.txt","w");
if(*pNr_of_patients>0){
    int i,j;
    for(i=0;i<*pNr_of_patients;i++){
        fprintf(fp,"%s\t%s\t[",reg[i].pers_nr,reg[i].name);
        for(j=0;j<reg[i].nr_of_ref-1;j++){
            fprintf(fp,"%d, ",reg[i].photo_ref[j]);
        }
        if(reg[i].photo_ref[j]==0){
            fprintf(fp,"]");
        }else{
            fprintf(fp,"%d]",reg[i].photo_ref[j]);
        }
        fprintf(fp,"\n");
    }
    fclose(fp);
}
}

This is my read_from_file function, it's missing code for reading the int array values at the end:

Edit: I added a for loop to remove the characters starting at "[" from the name string, now i just need to know how to read the array values at the end into the struct's photo reference array.

void read_from_file(Patient reg[],int *pNr_of_patients){
FILE *fp;
fp=fopen("file.txt","r");
if(fp!=NULL){
    reg[*pNr_of_patients].nr_of_ref=0;
    int i=0, pos;
    while(fgets(reg[*pNr_of_patients].pers_nr,13,fp)!=NULL){
        reg[*pNr_of_patients].pers_nr[strlen(reg[*pNr_of_patients].pers_nr)-1]='\0';
        fgets(reg[*pNr_of_patients].name,31,fp);
        reg[*pNr_of_patients].name[strlen(reg[*pNr_of_patients].name)-1]='\0';
        for(pos=0;pos<30;pos++){
            if(reg[*pNr_of_patients].name[pos]=='['){
                reg[*pNr_of_patients].name[pos]='\0';
            }
        }
        (*pNr_of_patients)++;
    }
    fclose(fp);
}else{
  printf("File does not exist\n");  
}
}

This is what my Patient struct looks like:

struct patient{
char pers_nr[12], name[30];
int photo_ref[10], nr_of_ref;
};
typedef struct patient Patient;

Calling read_from_file in main:

int main(void){
Patient patient_register[10000];
int nr_of_patients=0;
read_from_file(patient_register,&nr_of_patients);
database_management(patient_register,&nr_of_patients); //this is where I fill all the data into the array before writing to the file at the end
write_to_file(patient_register,&nr_of_patients);
return 0;

}

Upvotes: 0

Views: 167

Answers (4)

chux
chux

Reputation: 154173

Divide and Conquer

Break this down into steps. Make a function that populates 1 Patient.

The below is untested code. Consider it a starting point. The deign goal is to make a function that reads 1 line into 1 Patient.


Read in 1 entire line

// return 1: success, 0: failure EOF:end-of-file
int read_once_from_file(FILE *stream, Patient *pat_ptr) {
  Patient pat = { 0 };
  char buffer[100 + 30*13];
  if (fgets(buffer, sizeof buffer, stream) == NULL) {
    return EOF;
  }

Parse the first part. Use "%n" which records the parsing offset. Use width limits on string input.

  int n = 0;
  if (sscanf(buffer, " %11[^\t] %29[^\t] [ %n", pat.pers_nr, pat.name) != 2) {
    return 0; // improper formatted input
  }
  char *p = buffer + n;

Now look for ']' and photo_ref

  if (*p != ']') {
    for (pat.nr_of_ref=0; ; pat.nr_of_ref++) {
      if (sscanf(p, "%d %n", &pat.photo_ref[i], &n) != 1) {
        return 0; // improper formatted input
      }
      p += n;
      if (*p == ']') {
        pat.nr_of_ref++;
        break;
      }
      if (*p != ',' || pat.nr_of_ref + 1 == 10) {
        return 0; // improper formatted input
      }
      p++;
    }
  }

Save result

  *pat_ptr = pat;
  return 1;
}

Call read_once_from_file() as needed

void read_from_file(Patient reg[],int *pNr_of_patients){
  *pNr_of_patients = 0;
  FILE *fp = fopen("file.txt","r");
  if(fp){
    for (int i = 0; i<10000; i++) {
      int count = read_once_from_file(fp, &reg[i]);
      if (count ==  EOF) {
        break;
      }
      if (count != 1) {
        // error
        fprintf(stderr, "Input error\n"); 
        break;
      }
    } 
    *pNr_of_patients = i;
    fclose(fp);
  }
}

Upvotes: 0

G. Sliepen
G. Sliepen

Reputation: 7983

There are some good answers already, but most of them try to use a single method to parse all elements of the line. I would read whole lines into a buffer first, then use sscanf() to parse the patient number and name, but use strtok() to split the array into its individual components:

void read_from_file(Patient reg[], int *pNr_of_patients) {
    FILE *fp = fopen("file.txt", "r");
    if (!fp) {
        fprintf(stderr, "Error opening file: %s\n", strerror(errno));
        *pNr_of_patients = 0;
        return;
    }

    char line[1024];
    int i = 0;

    while (fgets(line, sizeof line, fp)) {
        int offset = 0;
        int refs = 0;

        sscanf(line, "%11s %29[^[] [%n", &reg[i].pers_nr, &reg[i].name, &offset);

        for (char *tok = strtok(line + offset, ","); tok && refs < 10; tok = strtok(NULL, ",")) {
            if (*tok != ']')
                reg[i].photo_ref[refs++] = atoi(tok);
        }
        reg[i].nr_of_ref = refs;
        i++;
    }

    *pNr_of_patients = i;
}

Upvotes: 0

Tano Fotang
Tano Fotang

Reputation: 464

This was meant as a comment but got too long, so I type it here.

read_from_file() appears overly complex. You might consider revisiting fscanf, reading the photo references as a whole string and then parsing into integers which you can assign to the photo_ref array. (While the code below might compile, I haven't verified that it works. It's just an idea of how one might proceed.)

void read_from_file (Patient reg[], int *pNr_of_patients)
{
  FILE *fp;
  fp = fopen ("file.txt", "r");
  if (fp != NULL)
    {
      int n;
      int i = 0;        // position in photo_ref
      char refs[30];
      *pNr_of_patients = 0;
      while (EOF !=
         (n =
          fscanf (fp, "%s %[^[]%[^]]]", reg[*pNr_of_patients].pers_nr,
              reg[*pNr_of_patients].name, refs)))
    {
      // btw, reg[*pNr_of_patients].name may contain terminating blanks. right trim it. that's easy enough.
      if (n > 2)
        { /* found photo refs.Now split the string into integers */
          char *s = refs + 1;   //skip '['
          char *p;
          while (*s && i<10){       // scan for the integers, 10 of them
            while (*s && *s == ' ')
              s++;  // skip blanks
            p = s;  // mark start of number
            while (*p && *p != ',')
              p++;
            if (*p == ',')
              *p = 0;
            reg[*pNr_of_patients].photo_ref[i++] = atoi (s);    //tip: use strtol(3), verify that `i' isnt larger than size of the array
            s = p + 1;  // skip ','. Must Do: verify that `s' hasnt yet moved past the end of `ref'!!
          }
        }
      (*pNr_of_patients)++;
    }
      fclose (fp);
    }
  else
    {
      printf ("File does not exist\n");
    }
}

Upvotes: 0

KamilCuk
KamilCuk

Reputation: 141698

I think that scanning input is one of the hardest in C. That's why libraries like cs50 exists, to ease up reading input for new C users. Anyway, I constructed my solution, but I redesigned your function.

The first solution reads a single Patient from a line. It does not use sscanf the only standard call that set's errno is to strtol, which is used to convert up numbers.
The second function uses sscanf and some crazy format string construction to stay safe of buffer overflow.
It all brings down at to how the input stream is constructed and how much you trust it.

#include <stdio.h>
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <limits.h>

struct patient{
    char pers_nr[12];
    char name[30];
    int photo_ref[10];
    size_t nr_of_ref;
};

typedef struct patient Patient;

int patient_read_from_line_1(const char line[], Patient *p)
{
    assert(line != NULL);
    assert(p != NULL);

    // check the first 12 characters ----------
    // first 6 chars must be numbers
    for (int i = 0; i < 6; ++i) {
        if (!isdigit(line[i])) {
            return -__LINE__;
        }
    }
    // followed by a single '-'
    if (line[6] != '-') {
        return -__LINE__;
    }
    // followed by 4 numbers
    for (int i = 7; i < 7 + 4; ++i) {
        if (!isdigit(line[i])) {
            return -__LINE__;
        }
    }
    // followed by a space
    if (line[7 + 4] != ' ') {
        return -__LINE__;
    }
    // read up first field ---------------------
    // cool first field checks out
    memcpy(p->pers_nr, line, 11);
    p->pers_nr[11] = '\0';

    line += 12;
    // let's omit spaces
    while (line[0] == ' ') {
        line++;
    }

    // read up second field --------------------------
    // now we should read a two strings separated by a space
    // so we should read up until a second space
    if (!isalpha(*line)) {
        return -__LINE__;
    }
    const char *pnt_first_space = strchr(line, ' ');
    if (pnt_first_space == NULL) {
        return -__LINE__;
    }
    const char *pnt_another_space = strchr(pnt_first_space + 1, ' ');
    if (pnt_another_space == NULL) {
        return -__LINE__;
    }
    const size_t name_to_read_length = pnt_another_space - line;
    if (name_to_read_length > sizeof(p->name)) {
        return -__LINE__;
    }
    memcpy(p->name, line, name_to_read_length);
    p->name[name_to_read_length] = '\0';

    // buh two fields done, now the array
    line += name_to_read_length;
    // let's omit the spaces
    while (line[0] == ' ') {
        line++;
    }

    // read up array -----------------------------------
    // array
    if (line[0] != '[') {
        return -__LINE__;
    }
    line++;
    for (size_t numscnt = 0;; ++numscnt) {
        if (numscnt >= sizeof(p->photo_ref)/sizeof(*p->photo_ref)) {
            return -__LINE__;
        }
        char *pnt;
        errno = 0;
        long num = strtol(line, &pnt, 10);
        if (errno) {
            return -__LINE__;
        }
        if (!(INT_MIN < num && num < INT_MAX)) {
            return -__LINE__;
        }
        p->photo_ref[numscnt] = num;

        line = pnt;
        // omit spaces
        while (*line == ' ') line++;
        // now we should get a comma
        if (line[0] != ',') {
            // if don't get a comma, we need to get a ]
            if (line[0] == ']') {
                // cool
                ++line;
                // but remember to save the count
                p->nr_of_ref = numscnt + 1;
                // cool
                break;
            }
            return -__LINE__;
        }
        ++line;
        // omit spaces
        while (*line == ' ') line++;
        // start again
    }
    // this needs to be end of line or newline
    if (line[0] != '\0' && line[0] != '\n') {
        return -__LINE__;
    }
    // success!
    return 0;
}

// ok, ok, ok, let's use sscanf
int patient_read_from_line_2(const char line[], Patient *p)
{
    assert(line != NULL);
    assert(p != NULL);
    int ret;
    int pos;

    // read up first fiedl and half of the second ------------------
    ret = sscanf(line, "%12s %30[^ ] %n", p->pers_nr, p->name, &pos);
    if (ret != 2) {
        return -__LINE__;
    }
    line += pos;

    // read up another half of the second field -------------------
    const size_t cur_name_len = strlen(p->name);
    p->name[cur_name_len] = ' ';
    char tmp[20];
    ret = snprintf(tmp, 20, "%%%d[^ ] [%%n", (int)(sizeof(p->name) - cur_name_len - 1));
    if (ret < 0) {
        return -__LINE__;
    }
    ret = sscanf(line, tmp, &p->name[cur_name_len + 1], &pos);
    if (ret != 1) {
        return -__LINE__;
    }
    line += pos;

    // read up array *sigh* ------------------------------------------- 
    for (p->nr_of_ref = 0;; ++p->nr_of_ref) {
        if (p->nr_of_ref >= sizeof(p->photo_ref)/sizeof(*p->photo_ref)) {
            return -__LINE__;
        }

        ret = sscanf(line, " %d%1s%n", &p->photo_ref[p->nr_of_ref], tmp, &pos);
        if (ret == 0) {
            // hm...
            if (line[0] == ']') {
                // ach all ok, empty numbers list;
                line++;
                p->nr_of_ref++;
                break;
            }
            return -__LINE__;
        }
        if (ret != 2) {
            return -__LINE__;
        }
        line += pos;
        if (tmp[0] != ',') {
            if (tmp[0] == ']') {
                // whoa!  success
                p->nr_of_ref++;
                // cool
                break;
            }
            return -__LINE__;
        }
    }

    // so what's left? - EOF or newline
    if (line[0] != '\0' && line[0] != '\n') {
        return -__LINE__;
    }

    // success!
    return 0;
}

long patient_read_from_file(FILE *fp, Patient patients[], size_t patients_len)
{
    size_t patients_cnt = 0;

    char line[256];
    // for each line in file
    while (fgets(line, sizeof(line), fp) != NULL) {

        const int ret = patient_read_from_line_2(line, &patients[patients_cnt]);
        if (ret < 0) {
            // hanle reading error 
            return ret;
        }

        patients_cnt++;
        if (patients_cnt > patients_len) {
            // no more memory in patients left
            return -__LINE__;
        }

    }

    return patients_cnt;
}

void patient_fprintln(FILE *f, const Patient *p)
{
    fprintf(f, "%s %s [", p->pers_nr, p->name);
    for (size_t i = 0; i < p->nr_of_ref; ++i) {
        fprintf(f, "%d", p->photo_ref[i]);
        if (i + 1 != p->nr_of_ref) {
            fprintf(f, ",");
        }
    }
    fprintf(f, "]\n");
}

int main()
{
    FILE *fp;
    fp = stdin; // fopen("file.txt","r");
    if (fp == NULL) {
        return -__LINE__;
    }

    Patient patients[3];
    const long patients_cnt = patient_read_from_file(fp, patients, sizeof(patients)/sizeof(*patients));
    if (patients_cnt < 0) {
        fprintf(stderr, "patient_read_from_file error %ld\n", patients_cnt);
        return patients_cnt;
    }

    fclose(fp);

    printf("Readed %d patients:\n", patients_cnt);
    for (size_t i = 0; i < patients_cnt; ++i) {
        patient_fprintln(stdout, &patients[i]);
    }

    return 0;
}

Live version available at onlinedbg.

This can be simplified for 100%. This has bugs for 100%. It is just to show what methods (strtol, memcpy, sscanf, isdigit, isalpha) are sometimes used by people to read from input. Also I specify length modifier to scanf (sscanf(..., "%12s") to handle overflows (hopefully). Try to always check return values from scanf and other standard functions (maybe checking snprintf return value is a little too much, but hey, let's be consistent). Be vary, that on some platforms the %n scanf modifier happens not to work. Also this can be build up to use dynamic allocation using malloc, realloc and free, both on line reading (basically it is equal to writing custom version of GNU getline), reading strings from input, reading int's array from input and dynamic allocations of patients.

Upvotes: 0

Related Questions