Reputation: 31

checking for duplications in sorted array

I am doing some homework and i stuck in a section where i need to move all the duplicates in a sorted array to the end of the array. I know how to do it with 1 additional array but without additional array i dont know(a bit confusing pointers)

i tried this idea: (but its not working and no idea why)


int main()
{
    int newArr[12] =  {};
    int n = 12;
    int first = 0;
    int last = n - 1;
    int* D = arr + 1;
    int j = 0;
        for (int i = 0; i < n; i++)
    {
        j = i + 1;

        while (j < n)
        {
            if (newArr[i] != *D)
            {
                D++;
            }
            if (newArr[i] == *D)
            {
                swap(&newArr[i], &newArr[last]);
                j++;
                last--;
            }
            j++;
        }
        
            
    }
    for (int i = 0; i < n; i++)
    {
        printf("%d", newArr[i]);
    }
}

the working 2 array version:

int moveDuplicatesV1(int* arr, int n)
{
    int* seen_before = (int*)calloc(n * 2 + 1, sizeof(int));
    assert(seen_before);
    int val = 0, count = 0, flag = 1;
    int j = 0;
    for (int i = 0; i < n; i++)
    {
        val = arr[i];
        if (seen_before[arr[i] + n] == 0)
        {
            seen_before[arr[i] + n]++;
            count++;
            continue;

        }
        else if (flag)
        {
            j = i + 1;
            flag = 0;
        }
        while (j < n)
        {
            if (seen_before[arr[j] + n] == 0)
            {
                count++;
                seen_before[arr[j] + n]++;
                swap(&arr[i], &arr[j]);
                j++;
                if (j == n)
                {
                    free(seen_before);
                    return count;
                }
                break;
            }
            /*break;*/
            j++;
            if (j == n)
            {
                free(seen_before);
                return count;
            }
        }
    }
}

Upvotes: 1

Answers (3)

Bob__

Reputation: 12749

Just for fun (that's unlikely what OP was required to do), the following is an algorithm that doesn't require extra memory to move all the duplicates to the end of the array.

It doesn't use memmove, but a single optional swap at every step. That means that it isn't stable and requires the duplicates to be sorted as a final step.

I'd call it a rolling bubble, for lack of a better analogy:

// It will use qsort, so it needs a comparator function
int cmp_int(void const *a, void const *b)
{
  int x = *(int const *)a;
  int y = *(int const *)b;
  return (y < x) - (x < y);
}

void i_swap(int *a, int *b)
{
  int tmp = *a;
  *a = *b;
  *b = tmp;
}

// The function returns the index of the first duplicate
size_t unique_partition(size_t n, int *arr)
{
  // In general, the passed array could be unsorted.
  qsort(arr, n, sizeof *arr, cmp_int);

  // Find the first duplicate.
  size_t i = 1;
  while ( i < n  &&  arr[i] != arr[i - 1] )
  {
    ++i;
  }
  if ( i == n )
  {
    return n;
  }
  
  // Now it has to move around the duplicates. 
  size_t n_dupes = 1;
  while ( ++i < n )
  {
    // If it's a duplicate of the last unique value, just add it to the "bubble".
    if ( arr[i] == arr[i - n_dupes - 1] )
    {
      ++n_dupes;
    }
    // Otherwise swap it with the first duplicate. It becomes the new last unique.
    else
    {
      i_swap(arr + i, arr + i - n_dupes);
    }
  }

  // As mentioned, the duplicates are shuffled now.
  qsort(arr + n - n_dupes, n_dupes, sizeof *arr, cmp_int);

  return n - n_dupes;
}

Upvotes: 0

user9706

Reputation:

@Chris drew some pretty pictures for you and I wrote the matching implementation. Starting at position 1 instead of 0 is a minor refactoring. My implementation, as noted below, return a pointer to the first duplicate value.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
 [0; i[: unique values
 [i; n - d[: possible duplicates (yet to be processed)
 [n-d; n[: duplicates

 return pointer to first duplicate value.  Note this is
 1 past the end of a if there are no duplicates.  This is
 ok just don't dereference it before checking.
*/
int *partition(size_t n, int a[n]) {
    size_t d = 0;
    for(size_t i = 1; i < n-d;) {
        if(a[i-1] == a[i]) {
            int tmp = a[i];
            memmove(a+i, a+i+1, (n-i-1) * sizeof(*a));
            a[n-1] = tmp;
            d++;
            continue;
        }
        i++;
    }
    return a + n - d;
}

void print_a(const char *prefix, size_t n, const int a[n]) {
    printf("%s", prefix);
    for(size_t i = 0; i < n; i++) {
        printf("%d%s", a[i], i + 1 < n ? ", " : "");
    }
    printf("\n");
}

int main() {
    int a[] = {1,2,2,2,3,3,5,6,7,7,9,9};
    size_t n = sizeof a / sizeof *a;
    int *p = partition(n, a);
    print_a("unique   : ", p - a, a);
    print_a("duplicate: ", n - (p - a), p);
}

and the output is:

unique   : 1, 2, 3, 5, 6, 7, 9
duplicate: 2, 2, 3, 7, 9

You could elect to move n - d - i - 2 elements and insert the new duplicate element at a[d] (instead of a[n-1]). This will reverse sort the duplicate partition. If you care then reverse the duplication partition before you return. It will probably be a little faster.

Another optimization would to move a run of (r - 1) duplicate values by copying the first into tmp as above, create a (r - 1) hole with memmove() then write (r-1) copies of tmp in that hole (either at a[d-r-1] or a[n-r-1]).

Upvotes: 1

Chris

Reputation: 36496

Consider your array:

  0   1   2   3   4   5   6   7   8   9   10  11
+---+---+---+---+---+---+---+---+---+---+---+---+
| 1 | 2 | 2 | 2 | 3 | 3 | 5 | 6 | 7 | 7 | 9 | 9 |
+---+---+---+---+---+---+---+---+---+---+---+---+

It sounds like you're trying to get to:

  0   1   2   3   4   5   6   7   8   9   10  11
+---+---+---+---+---+---+---+---+---+---+---+---+
| 1 | 2 | 3 | 5 | 6 | 7 | 9 | 2 | 2 | 3 | 7 | 9 |
+---+---+---+---+---+---+---+---+---+---+---+---+

If we start at index 0 and compare to index 1, they are not the same, so we move on to comparing index 1 and index 2.

They are the same. So we increment a dupes_found variable from 0 to 1. We save arr[i] and move the rest of the array forward 1 position, then copy what was in arr[i] into the last spot in the array.

  0   1   2   3   4   5   6   7   8   9   10  11
+---+---+---+---+---+---+---+---+---+---+---+---+
| 1 | 2 | 2 | 3 | 3 | 5 | 6 | 7 | 7 | 9 | 9 | 2 |
+---+---+---+---+---+---+---+---+---+---+---+---+

Because we've moved the array, we don't move forward. We now compare indices 1 and 2 again. They are still the same, so we repeat, having again incremented dupes_found.

  0   1   2   3   4   5   6   7   8   9   10  11
+---+---+---+---+---+---+---+---+---+---+---+---+
| 1 | 2 | 3 | 3 | 5 | 6 | 7 | 7 | 9 | 9 | 2 | 2 |
+---+---+---+---+---+---+---+---+---+---+---+---+

When we're at the length of the array minus the dupes_found, the job is complete.

An implementation might look something like:

void relocate_duplicates_to_end(int *arr, size_t n) {
    size_t dupes_found = 0;

    for (size_t i = 0; i < n - 1 - dupes_found;) {
        if (arr[i] == arr[i+1]) {
            dupes_found++;
            int temp = arr[i];
            memmove(&arr[i], &arr[i+1], sizeof(int) * (n - i - 1));
            arr[n - 1] = temp;
        }
        else {
            i++;
        }
    }
}

Upvotes: 3

checking for duplications in sorted array

Answers (3)

Related Questions