Mona Jalal
Mona Jalal

Reputation: 38155

sorting based on key from a file

I need to sort stuff read from a file similar to the following:

Key: 2 rec:1 2 3 4 5 6 ...
Key: 3 rec:7 8 9 10 11 ...
Key: 1 rec:A B C D E F ...

becomes

Key: 1 rec:A B C D E F ...
Key: 2 rec:1 2 3 4 5 6 ...
Key: 3 rec:7 8 9 10 11 ...

and if we have something unsorted in rec (record ) of a key, it will remain unchanged! as sorting is based on the key. I want to use qsort() defined in C for sorting. I have an idea to use strtok for breaking each line read from file into manageable arrays but I am not if it's the best way to find the key number so as to sort them with qsort from C library.

P.S.: Each line of the input file includes one key like Key: 1 rec:A B C D E F ... Also we won't sort records within a key.

Upvotes: 3

Views: 684

Answers (3)

Jonathan Leffler
Jonathan Leffler

Reputation: 753655

If you must write C, it needn't be all that long or complicated. You could simplify it more than this if you skimp on the error checking.

#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

extern void err_exit(const char *fmt, ...);

typedef struct data
{
    char *line;
    int   key;
} data;

static int cmp_data(const void *v1, const void *v2)
{
    const data *d1 = v1;
    const data *d2 = v2;
    if (d1->key < d2->key)
        return -1;
    else if (d1->key > d2->key)
        return +1;
    else
        return 0;
}

int main(void)
{
    char buffer[4096];
    data *array = 0;
    size_t array_len = 0;
    size_t array_max = 0;

    while (fgets(buffer, sizeof(buffer), stdin) != 0)
    {
        if (array_len >= array_max)
        {
            size_t new_size = (array_max + 2) * 2;
            void *space = realloc(array, new_size * sizeof(data));
            if (space == 0)
                err_exit("Out of memory (1)");
            array = space;
            array_max = new_size;
        }
        array[array_len].line = strdup(buffer);
        if (array[array_len].line == 0)
            err_exit("Out of memory (2)");
        if (sscanf(array[array_len].line, "%*s %d", &array[array_len].key) != 1)
            err_exit("Format error - no number in right place in: %.20s...\n",
                     array[array_len].line);
        //printf("%3zu:%.10d: %s", array_len, array[array_len].key,
        //       array[array_len].line);
        array_len++;
    }

    qsort(array, array_len, sizeof(data), cmp_data);

    for (size_t i = 0; i < array_len; i++)
        fputs(array[i].line, stdout);

    return 0;
}

void err_exit(const char *fmt, ...)
{
    int errnum = errno;
    va_list args;
    va_start(args, fmt);
    vfprintf(stderr, fmt, args);
    va_end(args);
    if (errnum != 0)
        fprintf(stderr, " (%d: %s)", errnum, strerror(errnum));
    putc('\n', stderr);
    exit(EXIT_FAILURE);
}

keysort — overwriting files as they are sorted

#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static void sort_file(const char *i_file, const char *o_file);

int main(int argc, char **argv)
{
    if (argc > 1)
    {
        for (int i = 1; i < argc; i++)
            sort_file(argv[i], argv[i]);
    }
    else
        sort_file("/dev/stdin", "/dev/stdout");
    return 0;
}

typedef struct data
{
    char *line;
    int   key;
} data;

static int cmp_data(const void *v1, const void *v2)
{
    const data *d1 = v1;
    const data *d2 = v2;
    if (d1->key < d2->key)
        return -1;
    else if (d1->key > d2->key)
        return +1;
    else
        return 0;
}

static void err_exit(const char *fmt, ...)
{
    int errnum = errno;
    va_list args;
    va_start(args, fmt);
    vfprintf(stderr, fmt, args);
    va_end(args);
    if (errnum != 0)
        fprintf(stderr, " (%d: %s)", errnum, strerror(errnum));
    putc('\n', stderr);
    exit(EXIT_FAILURE);
}

void sort_file(const char *i_file, const char *o_file)
{
    char buffer[4096];
    data *array = 0;
    size_t array_len = 0;
    size_t array_max = 0;

    FILE *i_fp = fopen(i_file, "r");
    if (i_fp == 0)
        err_exit("Failed to open file %s for reading", i_file);

    while (fgets(buffer, sizeof(buffer), i_fp) != 0)
    {
        if (array_len >= array_max)
        {
            size_t new_size = (array_max + 2) * 2;
            void *space = realloc(array, new_size * sizeof(data));
            if (space == 0)
                err_exit("Out of memory (1)");
            array = space;
            array_max = new_size;
        }
        array[array_len].line = strdup(buffer);
        if (array[array_len].line == 0)
            err_exit("Out of memory (2)");
        if (sscanf(array[array_len].line, "%*s %d", &array[array_len].key) != 1)
            err_exit("Format error - no number in right place in: %.20s...\n",
                     array[array_len].line);
        //printf("%3zu:%.10d: %s", array_len, array[array_len].key,
        //       array[array_len].line);
        array_len++;
    }
    fclose(i_fp);

    qsort(array, array_len, sizeof(data), cmp_data);

    FILE *o_fp = fopen(o_file, "w");
    if (o_fp == 0)
        err_exit("Failed to open file %s for writing", o_file);
    for (size_t i = 0; i < array_len; i++)
        fputs(array[i].line, o_fp);
    fclose(o_fp);
}

If your system doesn't support /dev/stdin and /dev/stdout, then you have to modify the interface to sort_file(), probably to:

void sort_file(const char *i_file, FILE *ifp, const char *o_file, FILE *ofp);

You then decide that if ifp is not null, you use it for input — otherwise you open the file specified by i_file. Similarly for output: if ofp is not null, you use it — otherwise, you open the file specified by o_file. The changes to main() and in the body of sort_file() are trivial.

Upvotes: 1

smac89
smac89

Reputation: 43078

To do this in c, make use of sscanf and you can get a sort of regex to extract the integer needed:

int comp(const void *str1, const void *str2) {
    char *a = *(char **)str1, *b = *(char **)str2;
    int key1, key2;
    sscanf(a, "%*s%d", &key1);
    sscanf(b, "%*s%d", &key2);
    return key1-key2;
}

//Call the function qsort like so
qsort(/*char **/lines, /*int*/numElements, /*unsigned*/ sizeof (char*), comp);

Don't know how to use the regex library in c++, but sscanf still works. Full working example in c++11:

#include <iostream>
#include <cstdio>
#include <deque>
#include <string>
#include <algorithm>

int main() {

    //Using fstream, read in each line of the file into a string using getline(...)
    std::deque<std::string> lines = {
        "Key: 2 rec:1 2 3 4 5 6",
        "Key: 3 rec:7 8 9 10 11",
        "Key: 1 rec:A B C D E F",
        "Key: 4 rec:1 2 3 4 5 6"
    }; //Store each in a deque object

    //using std::sort
    std::sort(lines.begin(), lines.end(), []( const std::string &str1, const std::string &str2 ) {
        int key1, key2;
        sscanf(str1.c_str(), "%*s%d", &key1);
        sscanf(str2.c_str(), "%*s%d", &key2);
        return (key1 < key2);
    });


    for (auto sortedkeys: lines)
        std::cout << sortedkeys << "\n";
    return 0;
}

Upvotes: 3

Gangadhar
Gangadhar

Reputation: 10516

IF the key lengths are not same you should avoid usage of strncmp and read line by line and then get key value by using loop from line[5] to next space(or else use strtok with delimiter of space).

Repeat this till EOF. store key values in array or list.

Next sort array or list.

Now find the value of Key from sorted array in your file by using strstr and copy matched line into new file. before using strstr convert key into string.

if you want to avoid coping into new files need to move file pointer between the lines using fseek and Modify lines.

Upvotes: 1

Related Questions