Didar004
Didar004

Reputation: 85

How to effectively code to check if two arrays with the same number of elements has the same elements even if in different positions?

I was trying to solve this problem: Given a number N and two arrays of A, B of N number of elements determine if B contains the same elements as A (not necessarily at the same position).

There will be three inputs:

Examples:

input:

4
4 2 3 7
2 3 4 9

output: no

input:

5
5 1 1 9 3
1 9 1 5 3

output: yes

My Code:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    int n;
    scanf("%d", &n);
    long int a[n], b[n];
    for (int i = 0; i < n; i++)
    {
        scanf("%ld", &a[i]);
    }
    for (int i = 0; i < n; i++)
    {
        scanf("%ld", &b[i]);
    }
    
    int same;
    for (int j = 0; j < n; j++)
    {
        same = 0;
        for (int k = 0; k < n; k++)
        {
            if (a[j] == b[k])
            {
                same = 1;
            }
        }
        if (same == 0)
        {
            break;
        }
    }
    if (same == 0)
    {
        printf("no\n");
    }
    else
    {
        printf("yes\n");
    }
    return 0;
}

My code will take the first element of the array A and compare it with all the elements of array B and then take the following elements of A and do the same. This approach could successfully give correct output for the first 3 test cases (only 2 were shown in the question) but it shows wrong answer on test 4 and I don't know why as the input of that test case is not shown. So, please help me find the problem in my code.

Upvotes: 4

Views: 261

Answers (5)

Craig Estey
Craig Estey

Reputation: 33601

As others have mentioned, sorting the arrays and comparing the result may be the more general solution.

chqrlie's non-sorted solution uses a boolean array seen but [still] runs in quadratic time (i.e. O(n^2)).

If we change seen to be a histogram, we can reduce the result to linear time (i.e. 3*O(n) --> O(n)) at the expense of using more space for the seen array.

  1. When reading in the a values, we increment seen[a[i]]

  2. When reading in the b values, we decrement seen[b[i]]

  3. In a third [linear] pass through the seen array, we check for a non-zero seen value.

  4. If the arrays match, all seen elements will be zero. If we find any seen value that is non-zero, we have a mismatch.


I've modified chqlie's code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int
main(int argc, char **argv)
{
    int n;

    --argc;
    ++argv;

    FILE *fi = stdin;

    if (argc > 0)
        fi = fopen(*argv, "r");
    if (fi == NULL)
        exit(1);

    if (fscanf(fi, "%d", &n) != 1 || n <= 0)
        return 1;

    long int a[n],
     b[n];
    long int lval;
    long int minval = 0;
    long int maxval = 0;

    for (int i = 0; i < n; i++) {
        if (fscanf(fi, "%ld", &lval) != 1)
            return 1;
        a[i] = lval;

        if (i == 0) {
            minval = lval;
            maxval = lval;
        }
        else {
            if (lval < minval)
                minval = lval;
            if (lval > maxval)
                maxval = lval;
        }
    }

    for (int i = 0; i < n; i++) {
        if (fscanf(fi, "%ld", &lval) != 1)
            return 1;
        b[i] = lval;
        if (lval < minval)
            minval = lval;
        if (lval > maxval)
            maxval = lval;
    }

    long int seencount = (maxval - minval) + 1;
    int *seen = calloc(seencount, sizeof(*seen));

    if (seen == NULL)
        exit(2);

    for (int i = 0; i < n; i++) {
        seen[a[i] - minval] += 1;
        seen[b[i] - minval] -= 1;
    }

    int same = 1;

    for (long int i = 0; i < seencount; i++) {
        if (seen[i] != 0) {
            same = 0;
            break;
        }
    }

    if (same == 0)
        printf("no\n");
    else
        printf("yes\n");

    free(seen);

    fclose(fi);

    return 0;
}

UPDATE:

Here's a version that first tries the O(n) solution. It falls back to the O(n*log(n)) sorted solution if there would be overflow or not enough memory:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

int
linear(int n,const long *a,const long *b,long minval,long maxval)
{
    int *seen = NULL;
    int same = -1;

    do {
        // handle overflow
        if ((minval == LONG_MIN) && (maxval == LONG_MAX))
            break;

        long seencount = (maxval - minval) + 1;

        // prevent "large" allocations
        if (seencount >= 2000000000)
            break;

        int *seen = calloc(seencount, sizeof(*seen));
        if (seen == NULL)
            break;

        for (long i = 0; i < n; i++) {
            seen[a[i] - minval] += 1;
            seen[b[i] - minval] -= 1;
        }

        same = 1;

        for (long i = 0; i < seencount; i++) {
            if (seen[i] != 0) {
                same = 0;
                break;
            }
        }

        free(seen);
    } while (0);

    return same;
}

int
cmp(const void *ap,const void *bp)
{
    long a = *(const long *) ap;
    long b = *(const long *) bp;
    int flg = 0;

    do {
        if (a < b) {
            flg = -1;
            break;
        }

        if (a > b) {
            flg = 1;
            break;
        }
    } while (0);

    return flg;
}

int
sorted(int n,long *a0,long *b0)
{
    long *a = malloc(sizeof(*a) * n);
    long *b = malloc(sizeof(*b) * n);
    int preserve = ((a != NULL) && (b != NULL));

    do {
        if (! preserve) {
            free(a);
            a = a0;

            free(b);
            b = b0;
            break;
        }

        for (int i = 0;  i < n;  ++i) {
            a[i] = a0[i];
            b[i] = b0[i];
        }
    } while (0);

    qsort(a,n,sizeof(*a),cmp);
    qsort(b,n,sizeof(*b),cmp);

    int same = 1;
    for (int i = 0;  i < n;  ++i) {
        if (a[i] != b[i]) {
            same = 0;
            break;
        }
    }

    if (preserve) {
        free(a);
        free(b);
    }

    return same;
}

int
main(int argc, char **argv)
{
    int n;

    --argc;
    ++argv;

    FILE *fi = stdin;

    if (argc > 0)
        fi = fopen(*argv, "r");
    if (fi == NULL)
        exit(1);

    if (fscanf(fi, "%d", &n) != 1 || n <= 0)
        return 1;

    long a[n], b[n];
    long lval;
    long minval = 0;
    long maxval = 0;

    for (int i = 0; i < n; i++) {
        if (fscanf(fi, "%ld", &lval) != 1)
            return 1;
        a[i] = lval;

        if (i == 0) {
            minval = lval;
            maxval = lval;
        }
        else {
            if (lval < minval)
                minval = lval;
            if (lval > maxval)
                maxval = lval;
        }
    }

    for (int i = 0; i < n; i++) {
        if (fscanf(fi, "%ld", &lval) != 1)
            return 1;
        b[i] = lval;
        if (lval < minval)
            minval = lval;
        if (lval > maxval)
            maxval = lval;
    }

    fclose(fi);

    int same;

    do {
        same = linear(n,a,b,minval,maxval);
        if (same >= 0)
            break;

        same = sorted(n,a,b);
    } while (0);

    if (same == 0)
        printf("no\n");
    else
        printf("yes\n");

    return 0;
}

Upvotes: 0

Simon Goater
Simon Goater

Reputation: 1908

As an extra to the other answers, if there is a reasonable chance that the arrays do contain different numbers, one can save work by obtaining some simple permutation independent stats of the arrays to compare first before doing all that costly sorting and comparing business. For example, just compare the max, min, and sum of each array.

void arrstats(int n, long int *arr, long int *max, long int *min, unsigned long int *sum) {
  long int max0, min0;
  unsigned long int sum0;
  max0 = arr[0];
  min0 = arr[0];
  sum0 = (unsigned long int)arr[0];
  for (int i=1; i<n; i++) {
    max0 = (arr[i] > max0 ? arr[i] : max0);
    min0 = (arr[i] < min0 ? arr[i] : min0);
    sum0 += (unsigned long int)arr[i];
  }
  *max = max0;
  *min = min0;
  *sum = sum0;
}

_Bool arraysdifferent(int n, long int *a, long int *b) {
// Note : false is inconclusive.
  if (n < 1) return false;
  unsigned long int suma, sumb;
  long int maxa, mina, maxb, minb;
  arrstats(n, a, &maxa, &mina, &suma);
  arrstats(n, b, &maxb, &minb, &sumb);
  return (maxa != maxb) || (mina != minb) || (suma != sumb);
}

Also, 'Bucket Sort', A.K.A. 'Pigeon Hole Sort' and 'Radix Sort' can sort integers in linear time and space O(n), though in practice may be slower than other methods depending on the number of elements and the bit width of the integers involved.

Upvotes: 1

William Pursell
William Pursell

Reputation: 212268

Rather than doing a complete sort on the data, you could build heaps and then successively pop off the min value. eg:

#include <stdio.h>
#include <stdlib.h>

void
read_into_array(int *d, int siz)
{
    for (int i = 0; i < siz; i += 1) {
        if (scanf("%d", d++) != 1) {
            fprintf(stderr, "invalid input\n");
            exit(1);
        }
    }
}

static void swap(int *a, int *b) { int t = *a; *a = *b; *b = t; }
void
down_heapify(int *h, int i, int end)
{
    int left;  /* index of left child */
    int right;  /* index of right child */
    while(
        left = 2 * i + 1, right = left + 1,
        (left < end  && h[i] > h[left])
        || (right < end && h[i] > h[right])
    ){
        int t = right;
        if( right >= end || h[left] < h[right] ){
            t = left;
        }
        swap(h + i, h + t);
        i = t;
    }
}

void
heapify(int *a, int siz)
{
    for (int i = siz / 2 ; i >= 0; i -= 1) {
        down_heapify(a, i, siz);
    }
}


/* Remove a value from the heap */
static int
pop(int *h, int siz)
{
    int rv = h[0];
    h[0] = h[--siz];

    down_heapify(h, 0, siz);
    return rv;
}

int
compare_inputs(int siz)
{
    int *a, *b;
    a = malloc(siz * sizeof *a);
    b = malloc(siz * sizeof *b);
    if (a == NULL || b == NULL) {
        fprintf(stderr, "out of memory\n");
        exit(1);
    }
    read_into_array(a, siz);
    read_into_array(b, siz);
    heapify(a, siz);

    heapify(b, siz);
    for (; siz > 0; siz -= 1) {
        if (pop(a, siz) != pop(b, siz)) {
            return 0;
        }
    }
    return 1;
}

int
main(void)
{
    int s;
    if (scanf("%d", &s) != 1) {
        fprintf(stderr, "invalid input\n");
        exit(1);
    }
    printf("%s\n", compare_inputs(s) ? "yes" : "no");
}

I haven't done a careful run-time analysis, but I suspect this will be more efficient than pre-sorting the arrays. (Once you've built the heap and popped, you are effectively sorting, but this gives you early termination once you detect a difference.) A few sample runs shows this giving performance comparable to chqrlie's hashing solution.

Upvotes: 1

chqrlie
chqrlie

Reputation: 144780

Your approach, which has quadratic time complexity, does not work if the arrays have the same numbers but in differing amounts: eg: 1 1 2 and 1 2 2 will produce yes eventhough the arrays do not strictly contain the same numbers in a possibly different order.

You could avoid this problem using an extra array of booleans storing whether b[k] has already been used.

A more efficient approach is to make copies of both arrays, sort them and compare the sorted arrays (complexity O(N.log(N))).

Here is a modified version using the extra array of booleans:

#include <stdio.h>
#include <stdbool.h>

int main(void)
{
    int n;

    if (scanf("%d", &n) != 1 || n <= 0)
        return 1;

    long int a[n], b[n];
    bool seen[n];

    for (int i = 0; i < n; i++) {
        if (scanf("%ld", &a[i]) != 1)
            return 1;
    }
    for (int i = 0; i < n; i++) {
        if (scanf("%ld", &b[i]) != 1)
            return 1;
        seen[i] = false;
    }
    
    int same = 1;
    for (int j = 0; j < n; j++) {
        same = 0;
        for (int k = 0; k < n; k++) {
            if (a[j] == b[k] && seen[k] == false) {
                same = 1;
                seen[k] = true;
                break;
            }
        }
        if (same == 0)
            break;
    }
    if (same == 0) {
        printf("no\n");
    } else {
        printf("yes\n");
    }
    return 0;
}

Here is a more efficient version (for large sets), that sorts the arrays using qsort:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>

int compare_longs(const void *aa, const void *bb) {
    const long int *a = aa;
    const long int *b = bb;
    return (*a > *b) - (*a < *b);
}

long int *read_array(int n) {
    long int *a = calloc(sizeof(*a), n);
    if (a == NULL) {
        fprintf(stderr, "cannot allocate array\n");
        exit(1);
    }
    for (int i = 0; i < n; i++) {
        if (scanf("%ld", &a[i]) != 1) {
            fprintf(stderr, "input error\n");
            free(a);
            exit(1);
        }
    }
    return a;
}

int main(void) {
    int n;

    if (scanf("%d", &n) != 1 || n <= 0)
        return 1;

    long int *a = read_array(n);
    long int *b = read_array(n);

    qsort(a, n, sizeof(*a), compare_longs);
    qsort(b, n, sizeof(*b), compare_longs);

    int same = 1;
    for (int i = 0; i < n; i++) {
        if (a[i] != b[i]) {
            same = 0;
            break;
        }
    }
    if (same == 0) {
        printf("no\n");
    } else {
        printf("yes\n");
    }
    free(a);
    free(b);
    return 0;
}

Finally, here is an alternative using a hash table that should have quasi linear time complexity at the cost of extra memory:

#include <limits.h>
#include <stdio.h>
#include <stdlib.h>

enum hash_state { FREE = 0, USED, DELETED };

typedef struct hash_entry {
    long int key;
    enum hash_state state;
    int count;
} hash_entry;

typedef struct hash_table {
    size_t size;
    hash_entry *arr;
} hash_table;

size_t hash_hash(hash_table *tab, long int key) {
    unsigned long int v = key;
    // rotate left one bit
    v = (v << 1) | (v / (ULONG_MAX ^ (ULONG_MAX >> 1)));
    return v % tab->size;
}

hash_table *hash_init(hash_table *tab, size_t n) {
    if (n > SIZE_MAX / 2)
        return NULL;
    tab->size = (n * 2) | 1;
    tab->arr = calloc(tab->size, sizeof(hash_entry));
    if (!tab->arr)
        return NULL;
    return tab;
}

void hash_free(hash_table *tab) {
    free(tab->arr);
}

hash_entry *hash_add(hash_table *tab, long int key) {
    for (size_t i = hash_hash(tab, key);; i = (i + 1) % tab->size) {
        if (tab->arr[i].state == FREE) {
            tab->arr[i].state = USED;
            tab->arr[i].key = key;
            return &tab->arr[i];
        }
        if (tab->arr[i].state == USED && tab->arr[i].key == key) {
            return &tab->arr[i];
        }
    }
}

hash_entry *hash_find(hash_table *tab, long int key) {
    for (size_t i = hash_hash(tab, key);; i = (i + 1) % tab->size) {
        if (tab->arr[i].state == FREE)
            return NULL;
        if (tab->arr[i].state == USED && tab->arr[i].key == key)
            return &tab->arr[i];
    }
}

long int *read_array(size_t n) {
    long int *a = calloc(sizeof(*a), n);
    if (a == NULL) {
        fprintf(stderr, "cannot allocate arrays\n");
        exit(1);
    }
    for (size_t i = 0; i < n; i++) {
        if (scanf("%ld", &a[i]) != 1) {
            fprintf(stderr, "input error\n");
            free(a);
            exit(1);
        }
    }
    return a;
}

int main(void) {
    int n;

    if (scanf("%d", &n) != 1 || n <= 0)
        return 1;

    long int *a = read_array(n);
    long int *b = read_array(n);

    hash_table tab[1];
    if (!hash_init(tab, n)) {
        fprintf(stderr, "cannot initialize hash table\n");
        return 1;
    }
    for (int i = 0; i < n; i++) {
        hash_add(tab, a[i])->count++;
    }
    int same = 1;
    for (int i = 0; i < n; i++) {
        hash_entry *e = hash_find(tab, b[i]);
        if (!e || !e->count--) {
            same = 0;
            break;
        }
    }
    hash_free(tab);
    if (same == 0) {
        printf("no\n");
    } else {
        printf("yes\n");
    }
    free(a);
    free(b);
    return 0;
}

In practice, the hash version is only slightly faster than the qsort version for large random sets (100k or 1 million random values) because the overhead of hashing is large, the hash function is simplistic and the log factor in sorting is small.

Here is a benchmark for 100000 random numbers (and a shuffled array):

chqrlie ~/dev/stackoverflow > time ./samesets-simplistic < samesets.100000
yes

real    0m2.212s
user    0m2.182s
sys     0m0.023s
chqrlie ~/dev/stackoverflow > time ./samesets-qsort < samesets.100000
yes

real    0m0.096s
user    0m0.080s
sys     0m0.013s
chqrlie ~/dev/stackoverflow > time ./samesets-hash < samesets.100000
yes

real    0m0.072s
user    0m0.052s
sys     0m0.014s

Upvotes: 5

Chris
Chris

Reputation: 36581

Not only does sorting the arrays first make the code more performant as a good sort will be O(n * log n) rather than O(n^2) and then you only need an O(n) comparison of arrays, but it also makes it a lot cleaner, helped by breaking out some of the repeated code into a separate scan_array function.

#include <stdlib.h>
#include <stdio.h>

size_t scan_array(int *arr, size_t n);
int cmp(const void *a, const void *b);

int main(void) {
    size_t n;

    if (scanf("%zu", &n) != 1 || n <= 0) return 1;

    int a[n], b[n];

    if (scan_array(a, n) != n) return 1;
    if (scan_array(b, n) != n) return 1;

    qsort(a, n, sizeof(int), cmp);
    qsort(b, n, sizeof(int), cmp);

    int same = 1;

    for (size_t i = 0; i < n; i++) {
        if (a[i] != b[i]) {
            same = 0;
            break;;
        }
    }

    if (same)
        printf("yes\n");
    else
        printf("no\n");
}

size_t scan_array(int *arr, size_t n) {
    size_t i;

    for (i = 0; i < n; i++) {
        if (scanf("%d", &arr[i]) != 1) return i;
    }

    return i;
}

int cmp(const void *a, const void *b) {
    int c = *(int *)a;
    int d = *(int *)b;

    return c == d ? 0 :
           c < d  ? -1 : 1;
}

Upvotes: 1

Related Questions