kids
kids

Reputation: 57

How to sort lines of file depending on value at end of each line

I'm trying to create a program that takes an input file and sorts it to a new output file in ascending order depending on the number at the end of each line. For example, if the input file contains three lines below:

a good man 50
65
better are 7

The corresponding sorted output file would be three lines but sorted:

better are 7
a good man 50
65

Code I have so far:

int sortLines(char * inputFileName, char * outputFileName)
{
   FILE *fpin = fopen(inputFileName, "r");//open file to to read
   if (!fpin)
   {
      printf("Error in file opening\n");
      exit (-1);
   }
   FILE *fpout = fopen(outputFileName, "w");//open file to to write
   if (!fpout)
   {
      printf("Error in opfile opening\n");
      exit (-1);
   }
   char file[10][1024];
   int i = 0;
   while(fgets(file[i], sizeof(file[i]), fpin))
      i++;
   int total = i;
   for(i = 0; i<total; ++i)
      printf("%s", file[i]);
   return 0;
}

Upvotes: 0

Views: 530

Answers (2)

David C. Rankin
David C. Rankin

Reputation: 84551

Continuing on from the comment, you can read the lines into a struct (containing the line and an int), then use strrchr to find the last space in each line (or if null, just take the whole line), convert the string with strtol or atoi or the like to set the int field of struct. Then it is a simple matter of sorting the structs based on the int member. I'll leave the reading into the struct to you, the sorting example is:

#include <stdio.h>
#include <stdlib.h>

#define MAXL 32

struct data {
    char line[MAXL];
    int n;
};

int compare_n (const void *a, const void *b)
{
    struct data *ia = (struct data *)a;
    struct data *ib = (struct data *)b;
    return (int)(ia->n - ib->n);
}

int main (void)
{
    struct data lines[] = {{"a good man 50", 50}, {"65", 65}, {"better are 7", 7}};
    size_t nstr = sizeof lines / sizeof *lines;
    size_t i = 0;

    qsort (lines, nstr, sizeof *lines, compare_n);

    for (i = 0; i < nstr; i++)
        printf (" %s\n", lines[i].line);

    return 0;
}

Output

$ ./bin/struct_sort_int
 better are 7
 a good man 50
 65

full example

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXL 64

/* simple struct holding char array and int */
struct data {
    char line[MAXL];
    int n;
};

/* qsort comparison function for int 'n' */
int compare_n (const void *a, const void *b)
{
    struct data *ia = (struct data *)a;
    struct data *ib = (struct data *)b;
    return (int)(ia->n - ib->n);
}

int main (int argc, char **argv)
{
    if (argc < 2 ) {    /* validate at least 1 argument provided */
        fprintf (stderr, "error: insufficient input, usage: %s filename\n", argv[0]);
        return 1;
    }

    struct data lines[MAXL] = {{{0}, 0}};   /* array of struct      */
    char *ln = NULL;    /* buffer for getline, getline allocates    */
    size_t n = 0;       /* initial size of buf, 0 getline decides   */
    ssize_t nchr = 0;   /* getline return, no. of chars read        */
    size_t idx = 0;     /* index for array of struct                */
    size_t i = 0;       /* general iteration variable               */
    FILE *fp = NULL;    /* file pointer for input file              */

    if (!(fp = fopen (argv[1], "r"))) {     /* validate file open   */
        fprintf (stderr, "error: file open failed. '%s'\n", argv[1]);
        return 1;
    }

    /* read each line in file */
    while ((nchr = getline (&ln, &n, fp)) != -1)
    {
        while (nchr > 0 && (ln[nchr-1] == '\n' || ln[nchr-1] == '\r'))
            ln[--nchr] = 0;     /* strip newline or carriage rtn    */

        if (!nchr) continue;            /* skip blank lines         */

        if (nchr > MAXL - 1) {          /* test for line > MAXL -1  */
            fprintf (stderr, 
                    "warning: line will exceeded %d chars.\n", MAXL);
            continue;                   /* number at end invalid    */
        }

        strcpy (lines[idx].line, ln);  /* copy to struct.line       */

        char *p = NULL;
        if (!(p = strrchr (ln, ' ')))   /* pointer to last space    */
            p = ln;                     /* if no space, then line   */

        lines[idx].n = atoi (p);        /* convert string to int    */

        idx++;                          /* increment index          */

        if (idx == MAXL) {              /* if MAXL read, break      */
            fprintf (stderr, "warning: %d lines read.\n", MAXL);
            break;
        }
    }

    if (fp) fclose (fp);                /* close input file         */
    if (ln) free (ln);                  /* free line buffer mem     */

    qsort (lines, idx, sizeof *lines, compare_n);   /* sort struct  */

    for (i = 0; i < idx; i++)           /* print sorted array       */
        printf (" %s\n", lines[i].line);

    return 0;
}

Take a look and let me know if you have questions. Your data was in the file dat/endno.txt for my test. I'll add comments when I get a chance.

note: updated to skip blank lines and to check line length against MAXL to eliminate the possibility of a write beyond end of lines and skip lines that would be truncated rendering the number at end invalid.


without struct statically allocated arrays

The following is an example that uses two 2D arrays, one for the lines and then one holding the original line index and number at end of line. Unlike the dynamically allocated example below, this example is limited to reading MAXL lines from the file or no more than MAXS characters each. If a line is exactly MAXS characters long (including the null-terminator), it must be discarded, because there is no way of knowing if the number at end remains valid. The 2D array containing the line index and number at end is sorted based on the number at end, then lines are printed based on the original line index resulting in the lines printing in sorted order by number at end. While this may look simpler, it is far more limited than the method utilizing the struct or the dynamically allocated approach below. This is about all I can think to do to get your going. Good luck. Drop a line if you have questions.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXL 64
#define MAXS 128

int cmpint (const void *a, const void *b);

int main (int argc, char **argv) {

    if (argc < 2 ) {    /* validate at least 1 argument provided */
        fprintf (stderr, "error: insufficient input, usage: %s filename\n", argv[0]);
        return 1;
    }

    int numidx[MAXL][2] = {{0}};    /* array of integers                        */
    char lines[MAXL][MAXS] = {{0}}; /* array of strings                         */
    char ln[MAXS] = {0};            /* buffer for fgets, MAXS in length         */
    ssize_t nchr = 0;               /* getline return, no. of chars read        */
    size_t idx = 0;                 /* index for array of struct                */
    size_t i = 0;                   /* general iteration variable               */
    FILE *fp = NULL;                /* file pointer for input file              */

    if (!(fp = fopen (argv[1], "r"))) {         /* validate file open   */
        fprintf (stderr, "error: file open failed. '%s'\n", argv[1]);
        return 1;
    }

    /* read each line in file */
    while (fgets (ln, MAXS, fp) != NULL)
    {
        nchr = strlen (ln);             /* get length of ln         */

        while (nchr > 0 && (ln[nchr-1] == '\n' || ln[nchr-1] == '\r'))
            ln[--nchr] = 0;     /* strip newline or carriage rtn    */

        if (!nchr || nchr == MAXS - 2)  /* skip blank lines + full  */
            continue;                   /* lines (end no. invalid)  */

        strcpy (lines[idx], ln);        /* copy ln to lines[idx]    */

        char *p = NULL;
        if (!(p = strrchr (ln, ' ')))   /* pointer to last space    */
            p = ln;                     /* if no space, then line   */

        numidx[idx][0] = atoi (p);      /* save end no. in array    */
        numidx[idx][1] = idx;           /* save line index in array */

        idx++;                          /* increment index          */

        if (idx == MAXL) {              /* if MAXL read, break      */
            fprintf (stderr, "warning: %d lines read.\n", MAXL);
            break;
        }
    }

    fclose (fp);

    qsort (numidx, idx, sizeof (int) * 2, cmpint);/* sort array     */

    for (i = 0; i < idx; i++)           /* print sorted array       */
        printf (" %s\n", lines[numidx[i][1]]);

    return 0;
}

/* qsort integer compare function */
int cmpint (const void *pa, const void *pb )
{
    const int *a = pa;
    const int *b = pb;
    if (a[0] < b[0]) 
        return -1;
    return (b[0] < a[0]);
}

without struct, dynamically allocated arrays

To get around using a structure to hold the string an number, you can use 2 arrays. One to hold the strings, and another 2D array holding the original line index and number at end of line (2 integers). You then qsort the integer array on the (number at end) element, then loop through each line printing out the lines in sorted order based on the line index value of the sorted array. This is set to handle lines of any length and reallocate the number of lines (in each array) as needed. Since the dynamic allocation may be a bit much, I'm working on a static array version as well, but it will be tomorrow before I have time. Here is the first version:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXL 64

int cmpint (const void *a, const void *b);
char **realloc_char (char **sp, size_t *n);
int **realloc_int (int **ip, size_t *n);

int main (int argc, char **argv) {

    if (argc < 2 ) {    /* validate at least 1 argument provided */
        fprintf (stderr, "error: insufficient input, usage: %s filename\n", argv[0]);
        return 1;
    }

    int **numidx = NULL;    /* array of pointers-to-pointer-to-int      */
    char **lines = NULL;    /* array of pointer-to-pointer-to-char      */
    char *ln = NULL;        /* buffer for getline, getline allocates    */
    size_t n = 0;           /* initial size of buf, 0 getline decides   */
    ssize_t nchr = 0;       /* getline return, no. of chars read        */
    size_t idx = 0;         /* index for array of struct                */
    size_t i = 0;           /* general iteration variable               */
    size_t maxl = MAXL;     /* holds current allocation size of arrays  */
    FILE *fp = NULL;        /* file pointer for input file              */

    if (!(fp = fopen (argv[1], "r"))) {         /* validate file open   */
        fprintf (stderr, "error: file open failed. '%s'\n", argv[1]);
        return 1;
    }

    /* allocate MAXL pointers to int* */
    if (!(numidx = calloc (MAXL, sizeof *numidx))) {
        fprintf (stderr, "error: memory allocation failed.\n");
        return 1;
    }

    /* allocate MAXL pointers to char* */
    if (!(lines = calloc (MAXL, sizeof *lines))) {
        fprintf (stderr, "error: memory allocation failed.\n");
        return 1;
    }

    /* read each line in file */
    while ((nchr = getline (&ln, &n, fp)) != -1)
    {
        while (nchr > 0 && (ln[nchr-1] == '\n' || ln[nchr-1] == '\r'))
            ln[--nchr] = 0;     /* strip newline or carriage rtn    */

        if (!nchr) continue;            /* skip blank lines         */

        lines[idx] = strdup (ln);       /* copy ln to lines[idx]    */

        /* allocate space for 2 int at numidx[idx] */
        if (!(numidx[idx] = calloc (2, sizeof **numidx))) {
            fprintf (stderr, "error: memory allocation failed.\n");
            return 1;
        }

        char *p = NULL;
        if (!(p = strrchr (ln, ' ')))   /* pointer to last space    */
            p = ln;                     /* if no space, then line   */

        numidx[idx][0] = atoi (p);      /* save end no. in array    */
        numidx[idx][1] = idx;           /* save line index in array */

        idx++;                          /* increment index          */

        if (idx == maxl) {              /* if idx = maxl reallocate */
            size_t tsz = maxl;          /* tmp var, each get maxl   */
            numidx = realloc_int (numidx, &tsz);
            lines = realloc_char (lines, &maxl);
        }
    }

    if (ln) free (ln);
    fclose (fp);

    qsort (numidx, idx, sizeof *numidx, cmpint);  /* sort struct  */

    for (i = 0; i < idx; i++)           /* print sorted array       */
        printf (" %s\n", lines[numidx[i][1]]);

    for (i = 0; i < idx; i++) {         /* free allocated memory    */
        free (numidx[i]);
        free (lines[i]);
    }
    free (numidx);
    free (lines);

    return 0;
}

/* qsort integer compare function */
int cmpint (const void *a, const void *b)
{
    const int **ia = (const int **)a;
    const int **ib = (const int **)b;
    return (*ia)[0] - (*ib)[0];
}

/** realloc an array of pointers to strings setting memory to 0.
*  reallocate an array of character arrays setting
*  newly allocated memory to 0 to allow iteration
*/
char **realloc_char (char **sp, size_t *n)
{
    char **tmp = realloc (sp, 2 * *n * sizeof *sp);
    if (!tmp) {
        fprintf (stderr, "Error: struct reallocation failure.\n");
        // return NULL;
        exit (EXIT_FAILURE);
    }
    sp = tmp;
    memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
    *n *= 2;

    return sp;
}

/** realloc an array of pointers to int* setting memory to 0.
*  reallocate an array of integer arrays setting
*  newly allocated memory to 0 to allow iteration
*/
int **realloc_int (int **ip, size_t *n)
{
    int **tmp = realloc (ip, 2 * *n * sizeof *ip * 4);
    if (!tmp) {
        fprintf (stderr, "Error: struct reallocation failure.\n");
        // return NULL;
        exit (EXIT_FAILURE);
    }
    ip = tmp;
    memset (ip + *n, 0, *n * sizeof *ip * 4); /* memset new ptrs 0 */
    *n *= 2;

    return ip;
}

Upvotes: 3

rcgldr
rcgldr

Reputation: 28828

You could read the entire file into a single buffer, create an array of structures containing pointers to lines and the values at the end of each line (scan for newline characters), then sort the array of structures by the values, and output the data according to the pointers in the sorted array of structures.

Upvotes: 0

Related Questions