shurrok
shurrok

Reputation: 831

Sort array alphabetically, upper letter always first

I want to sort arrays alphabetically but I want to have upper case letter always first. What I already achieved is a simple sort, which doesn't take to account size of letters. Shall I put a special condition for it?

EDIT:

This is what I want to achieve: AaaAdDcCFfgGhHI should be sorted like this: AAaaCcDdFfGgHhI

#include <stdio.h>
#include <stdlib.h>

#define N 5

int compare(const void *w1, const void *w2);

int main(void) {
    char s1[N][15] = {
        { "azghtdffopAsAfp" },
        { "poiuyjklhgADHTp" },
        { "hgjkFfGgBnVUuKk" },
        { "lokijuhygtfrdek" },
        { "AaaAdDcCFfgGhHI" } };

    char *wsk;
    int i, j;
    wsk = s1;

    for (i = 0; i < N; i++) {
        for (j = 0; j < 15; j++) {
            printf("%c", s1[i][j]);
        }
        printf("\n");
    }
    for (i = 0; i < N; i++)
        qsort(s1[i], 15, sizeof(char), compare);

    printf("\n");
    for (i = 0; i < N; i++) {
        for (j = 0; j < 15; j++) {
            printf("%c", s1[i][j]);
        }
        printf("\n");
    }
    return 0;
}

int compare(const void *w1, const void *w2) {
    char *a1 = w1;
    char *a2 = w2;

    while (*a1 && *a2) {
        register r = tolower(*a1) - tolower(*a2);
        if (r)
            return r;
        ++a1;
        ++a2;

    }
    return tolower(*a1) - tolower(*a2);
}

Upvotes: 1

Views: 1338

Answers (5)

chqrlie
chqrlie

Reputation: 144715

Your comparison function is incorrect: it compares multiple characters instead of just the ones pointed to by the arguments.

If you can assume ASCII, here is a much simpler comparison function that solves the problem:

int compare(const void *w1, const void *w2) {
    int c1 = *(const unsigned char *)w1;
    int c2 = *(const unsigned char *)w2;
    int l1 = tolower(c1);
    int l2 = tolower(c2);

    /* sort first by alphabetical character, then by case */
    return l1 != l2 ? l1 - l2 : c1 - c2;
}

Also note that the main() function can be simplified too:

#include <stdio.h>
#include <stdlib.h>

#define N 5

int compare(const void *w1, const void *w2);

int main(void) {
    char s1[N][15] = {
        { "azghtdffopAsAfp" },
        { "poiuyjklhgADHTp" },
        { "hgjkFfGgBnVUuKk" },
        { "lokijuhygtfrdek" },
        { "AaaAdDcCFfgGhHI" } };

    for (int i = 0; i < N; i++) {
        printf("%.15s\n", s1[i]);
    }
    for (int i = 0; i < N; i++) {
        qsort(s1[i], 15, sizeof(char), compare);
    }

    printf("\n");
    for (int i = 0; i < N; i++) {
        printf("%.15s\n", s1[i]);
    }
    return 0;
}

Upvotes: 0

David C. Rankin
David C. Rankin

Reputation: 84551

It is horribly unclear whether you want to sort all the characters in each ROW, or you want to sort the array of strings in the array, (or both). Both can be accomplished, but both have slightly different compare requirements.

Presuming you want to sort the array of arrays (easier if you make them strings), you would expect output like:

$ ./bin/sortcapsfirst
azghtdffopAsAfp
poiuyjklhgADHTp
hgjkFfGgBnVUuKk
lokijuhygtfrdek
AaaAdDcCFfgGhHI

AaaAdDcCFfgGhHI
azghtdffopAsAfp
hgjkFfGgBnVUuKk
lokijuhygtfrdek
poiuyjklhgADHTp

Otherwise, you would need to sort each row first (sorting each upper-case, before the same lower-case), then sort the array. That would result in output as follows:

$ ./bin/sortcapsfirst
azghtdffopAsAfp
poiuyjklhgADHTp
hgjkFfGgBnVUuKk
lokijuhygtfrdek
AaaAdDcCFfgGhHI

AAaaCcDdFfGgHhI
AAadfffghoppstz
ADgHhijkloppTuy
BFfGgghjKkknUuV
defghijkklortuy

You may be making things a bit harder on yourself than it needs to be. Generally, the natural string sort for your LOCALE will sort Caps first by default. In the case of sorting the array s1 ordering the rows so that capitals sort before lower-case, you need only make your number of columns 16 (to provide space for a nul-terminating character) and then call strcmp in your compare routine, e.g.:

int compare(const void *w1, const void *w2) {

    const char *a1 = w1;
    const char *a2 = w2;

    return strcmp (a1, a2);
}

Putting it all together in an example, and properly terminating each j loop when the nul-terminating char is encountered, you could do:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define N 5
#define R 16

int compare(const void *w1, const void *w2);

int main(void) {

    char s1[][R] = {{ "azghtdffopAsAfp" },
                    { "poiuyjklhgADHTp" },
                    { "hgjkFfGgBnVUuKk" },
                    { "lokijuhygtfrdek" },
                    { "AaaAdDcCFfgGhHI" }};
    int i, j;

    for (i = 0; i<N; i++) {
        for (j = 0; s1[i][j] && j<R; j++) {
            putchar(s1[i][j]);  /* don't use printf to print a single-char */
        }
        putchar('\n');
    }

    qsort (s1, N, sizeof *s1, compare);  /* sort array (rows) */

    putchar('\n');
    for (i = 0; i<N; i++) {
        for (j = 0; s1[i][j] && j<R; j++) {
            putchar(s1[i][j]);
        }
        putchar('\n');
    }
    return 0;
}

int compare(const void *w1, const void *w2) {

    const char *a1 = w1;
    const char *a2 = w2;

    return strcmp (a1, a2);
}

For the second case where you sort the upper-case in each row before the equivalent lower-case and then sort the array, you simply add a second qsort compare function and call that as you are, before calling qsort on the entire array. e.g. (to sort each upper-case before the corresponding lower-case):

int compare (const void *w1, const void *w2) {
    const char *a1 = w1;
    const char *a2 = w2;

    while (*a1 && *a2)
    {
        int r = tolower(*a1) - tolower(*a2);
        if (!r) {
            if (*a1 - *a2)
                return *a1 - *a2 > 0 ? 1 : -1;
        }
        else
            break;
        ++a1;
        ++a2;
    }
    // return *a1 - *a2; /* to sort ALLcapsfirst */
    return tolower(*a1) - tolower(*a2);
}

Then call qsort as done in the first example to sort the rows in the array:

int comparestr (const void *w1, const void *w2) {

    const char *a1 = w1;
    const char *a2 = w2;

    return strcmp (a1, a2);
}

Putting that together in the same example (with nul-terminated rows), you could do:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define N 5
#define R 16

int compare (const void *w1, const void *w2);
int comparestr (const void *w1, const void *w2);

int main (void) {

    char s1[][R] = {{"azghtdffopAsAfp"},
                    {"poiuyjklhgADHTp"},
                    {"hgjkFfGgBnVUuKk"},
                    {"lokijuhygtfrdek"},
                    {"AaaAdDcCFfgGhHI"}};
    int i, j;

    for (i = 0; i < N; i++) {
        for (j = 0; s1[i][j] && j < R; j++)
            putchar(s1[i][j]);
        putchar('\n');
    }

    for (i = 0; i < N; i++)                     /* sort arrays */
        qsort (s1[i], R - 1, sizeof *(s1[i]), compare);
    qsort (s1, N, sizeof *s1, comparestr);      /* sort array */

    putchar('\n');
    for (i = 0; i < N; i++) {
        for (j = 0; s1[i][j] && j < R; j++)
            putchar(s1[i][j]);
        putchar('\n');
    }
    return 0;
}

int compare (const void *w1, const void *w2)
{
    const char *a1 = w1;
    const char *a2 = w2;

    while (*a1 && *a2) {
        int r = tolower (*a1) - tolower (*a2);
        if (!r) {
            if (*a1 - *a2)
                return *a1 - *a2 > 0 ? 1 : -1;
        } else
            break;
        ++a1;
        ++a2;

    }
    // return *a1 - *a2; /* to sort ALLcapsfirst */
    return tolower (*a1) - tolower (*a2);
}

int comparestr (const void *w1, const void *w2)
{
    const char *a1 = w1;
    const char *a2 = w2;

    return strcmp (a1, a2);
}

Finally, as noted above, if you want to sort ALLCapsfirst, then simply return the difference between *a1 - *a2 instead of tolower (*a1) - tolower (*a2). e.g. using return *a1 - *a2; the sort would be:

AACDFGHIaacdfgh
AAadfffghoppstz
ADHTghijkloppuy
BFGKUVfgghjkknu
defghijkklortuy

Look things over. I could have misunderstood your goal completely. If so, drop a note and I can help further in a bit.

Upvotes: 1

ad absurdum
ad absurdum

Reputation: 21317

We should start by fixing a few issues in your code. First, you need to add #include <ctype.h>. You have declared char *wsk;, and assigned wsk = s1; for no apparent reason. More importantly, these are incompatible types, since s1 is a pointer to an array of 15 chars. And more important still, s1 should be an array of 16 chars! You have forgotten to include space for the '\0' terminator in your character arrays. So, the declaration of s1 needs to become:

char s1[N][16] = { { "azghtdffopAsAfp" },
                   { "poiuyjklhgADHTp" },
                   { "hgjkFfGgBnVUuKk" },
                   { "lokijuhygtfrdek" },
                   { "AaaAdDcCFfgGhHI" } };

The call to qsort() can be improved. Rather than use the magic number 15, it would be better to store the length of the strings in a variable. Also, sizeof(char) is always 1:

for (i = 0; i<N; i++) {
    size_t s1_len = strlen(s1[i]);
    qsort(s1[i], s1_len, 1, compare);
}

In the compare() function itself, you need to change to:

const unsigned char *a1 = w1;
const unsigned char *a2 = w2;

The cast to const will avoid warnings about discarding const qualifiers. The cast to unsigned avoids undefined behavior since the ctype.h functions expect an int argument that is representable as an unsigned char, or equal to EOF. Also, register is a type qualifier: it needs to qualify a type. So you need register int r = ....

But your function is also relying on a property of the encoding of the execution character set that is not guaranteed by the Standard: that the letters are encoded in alphabetic sequence. You have taken the first step towards portability by using the tolower() function, rather than adding magic numbers to change the case of the characters. By using isupper() and islower() to test the case of characters, and by using strcoll() to test the ordering of characters, we can achieve something approaching maximum portability. strcoll() automatically orders uppercase letters before lowercase if it is appropriate for the locale, but it appears that all uppercase letters precede the lowercase, so an explicit test will be necessary to order two characters that compare equal after conversion to lowercase. One obstacle to overcome is that strcoll() compares strings for lexical ordering. To use it to compare characters we can deploy compound literals:

register int r = strcoll((const char[]){tolower(*c1), '\0'},
                         (const char[]){tolower(*c2), '\0'});

There is a loop in your compare() function that makes no sense to me. The compare() function should just compare two chars; there is no need to loop through anything, so I have removed this loop.

I wrote a new compare() function that uses strcoll() and compound literals to portably compare two chars. If the two characters compare equal (up to case), then their cases are checked. If the cases differ, the uppercase character is taken to come before the lowercase character.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>                 // added for strlen() and strcoll()
#include <ctype.h>                  // must add this

#define N 5

int compare(const void *w1, const void *w2);

int main(void) {
    /* Inner dimension should be 16 to include '\0' */
    char s1[N][16] = { { "azghtdffopAsAfp" },
                       { "poiuyjklhgADHTp" },
                       { "hgjkFfGgBnVUuKk" },
                       { "lokijuhygtfrdek" },
                       { "AaaAdDcCFfgGhHI" } };

//    char *wsk;                    // don't need this
    int i, j;
//    wsk = s1;                     // don't need this, also incompatible

    for (i = 0; i<N; i++) {
        for (j = 0; j<15; j++) {
            printf("%c", s1[i][j]);
        }
        printf("\n");
    }
    for (i = 0; i<N; i++) {
        size_t s1_len = strlen(s1[i]);
        qsort(s1[i], s1_len, 1, compare);  // improved call to qsort()
    }

    printf("\n");
    for (i = 0; i<N; i++) {
        for (j = 0; j<15; j++) {
            printf("%c", s1[i][j]);
        }
        printf("\n");
    }
    return 0;
}

int compare(const void *a1, const void *a2) {
    const unsigned char *c1 = a1;
    const unsigned char *c2 = a2;

    register int r = strcoll((const char[]){tolower(*c1), '\0'},
                             (const char[]){tolower(*c2), '\0'});
    if (r == 0) {
        if (isupper(*c1) && islower(*c2)) {
            r = -1;
        } else if (islower(*c1) && isupper(*c2)) {
            r = 1;
        }
    }

    return r;
}

Program output:

azghtdffopAsAfp
poiuyjklhgADHTp
hgjkFfGgBnVUuKk
lokijuhygtfrdek
AaaAdDcCFfgGhHI

AAadfffghoppstz
ADgHhijkloppTuy
BFfGgghjKkknUuV
defghijkklortuy
AAaaCcDdFfGgHhI

Upvotes: 2

Ingo Leonhardt
Ingo Leonhardt

Reputation: 9894

if you want such that upper case lower case distiction is made per character, so you would sort like "A", "Aa", "AB", "aa", "B", "b", compare could look like that

int compare(const void *w1, const void *w2) {
    char *a1 = w1;
    char *a2 = w2;

    while (*a1 && *a2)
    {   
        register r = tolower(*a1) - tolower(*a2);
        if (r)
            return r;
        // this is the new part
        else if( isupper( *a1 ) && !isupper( *a2 ) ) {
            // w1 < w2
            return -1;
        } else if( !isupper( *a1 ) && isupper( *a2 ) ) {
            // w1 > w2
            return 1;
        }

        ++a1;
        ++a2;

    }
    return tolower(*a1) - tolower(*a2);

}

If you want "aa" to be sorted before "AB" it could look like:

int compare(const void *w1, const void *w2) {
    char *a1 = w1;
    char *a2 = w2;
    register r;
    int caseDifference = 0;

    while (*a1 && *a2)
    {   
        r = tolower(*a1) - tolower(*a2);
        if (r)
            return r;
        // this is the new part
        else if( caseDifference == 0 && ( isupper( *a1 ) && !isupper( *a2 ) ) ) {
            // w1 < w2
            caseDifference = -1;
        } else if( caseDifference == 0 && ( !isupper( *a1 ) && isupper( *a2 ) ) ) {
            // w1 > w2
            caseDifference = 1;
        }

        ++a1;
        ++a2;

    }
    r = tolower(*a1) - tolower(*a2);
    if( r != 0 )
        return r;
    else
        return caseDifference;
}

Upvotes: 0

Dan
Dan

Reputation: 670

Instead of comparing the lowercase values, check the values ASCII values. In the table capital letters come first, then the lowercase ones: http://www.asciitable.com/

UPDATE: If you need a bit more platform and character set independent code, just add an extra if, and check the letter case with isupper() and/or islower():

Upvotes: 0

Related Questions