Reputation: 121

Remove subarray using pointer arithmetic

I need to make a function for removing subarray using pointer arithmetic in C. Function should return number of removed elements. Auxiliary arrays are not allowed.

#include <stdio.h>
int remove_subarray(int * first_start, int * first_end,const int * second_start,const int * second_end) {
  int size_of_second = second_end-second_start;
  int *subarray_start, *last = first_end - 1;
  const int *pok = second_start,*second_start_copy = second_start;
  int number_of_the_same = 0;
  while (first_start != first_end) {
    if ( * first_start == * second_start) {
      if (number_of_the_same == 0)
       subarray_start = first_start;
      first_start++;
      second_start++;
      number_of_the_same++;
      if (number_of_the_same == size_of_second) {
        first_start = subarray_start;
        while (1) {
          if ( *first_start == *last)
            break;
          subarray_start = first_start;
          subarray_start += size_of_second;
          *first_start = *subarray_start;
          first_start++;
        }
        break;
      }
    } else {
      number_of_the_same = 0;
      first_start++;
      second_start = second_start_copy;
    }
  }
  return size_of_second;
}
int main() {
  // This gives correct result
  int niz1[14] = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, -1},i;
  int niz2[4] = {2, 3, 4, 5};
  int k1 = remove_subarray(niz1, niz1 + 14, niz2, niz2 + 4);
  for (i = 0; i < 14 - k1; ++i)
    printf("%i ", niz1[i]);
  printf("\n");
  // This gives wrong result
  int niz3[10] = {1, 1, 2, 3, 5, 6, 1, 2, 4, 10};
  int niz4[3] = {1, 2, 3};
  int k2 = remove_subarray(niz3, niz3 + 10, niz4, niz4 + 3);
  for (i = 0; i < 10 - k2; i++) printf("%d ", niz3[i]);
  return 0;
}

My algorithm is the following:

if elements match, save position to pointer start
if number_of_the_same (elements) is equal to number of elements in second array (n) that means subarray is found
if subarray is found, I set all elements to be equal to the elements that are n positions forward them

In the main function I tried with two set of arrays (niz1 and niz2) and for the first set it worked correct. However it didn't work correct for second set of arrays (niz3 and niz4).

Could you help me to fix my code?

Upvotes: 0

Answers (3)

arfneto

Reputation: 1765

The provided code is very hard to read and so is also hard to test. At least for me. May be the author could have used more meaningful names.

I think that the bug in the original code is that, after finding the first number of the sub_array, if the search fails the program must not advance the pointer of the array, because the current value pointed to can be the real start of the sequence and the previous just a false positive. See the pair 1,1 in the second supplied set

I will let an example with 2 options that may help.

A TL;DR section is now at the end with a shorter example

The method in the example

The idea is that

you have an array and if it contains a certain sub_array you need to do something with the sub_array. Like a predicate function in other languages.
the arguments are open-ended segments like the iterators in C++ STL: first argument points to first element, second argument points past the end of the array
two functions are in the code
- remove_subarray() that moves the entire sub_array to the end of the array
- mark_subarray() that replaces all sub_array values by INT_MAX

Making things easier: a few helper functions

`int show_array(const int, const int, const char*);`

This function has 5 lines: just write down the array with an optional title like here

    char buffer[80] = {0};
    sprintf(buffer, "%d elements moved. Resulting array:", res);
    show_array(array, array + sz_arr, buffer);

or here

    show_array(array, array + sz_arr, "Base array:");

sample output:

3 elements moved. Resulting array:    [  1  2  4  10  5  6  1  1  2  3  ]

              Base array:    [  1  1  2  3  5  6  1  2  4  10  ]

`int* find_sub_array(const int, const int, const int, const int);`

returns NULL if the sub_array is not found in the array, or the address of the sub_array.

This type of things are easily expressed by a FSM, a state machine. Here we need a minimalist set of 2 states:

one before find the possible start of a sub_array
one to search for the rest of the sub_array

A possible implementation

int* find_sub_array(
    const int* arr_start, const int* arr_end,
    const int* sa_start, const int* sa_end)
{  
    char st    = 0;
    int* pA    = (int*)arr_start;
    int* pSA   = (int*)sa_start;
    int* sa_ix = 0;  // address of the sub_array in array
    while (1)
    {
        switch (st)
        {
            case 0:
                if (*pA == *pSA)
                {
                    st    = 1;
                    sa_ix = pA;         // found 1st
                    pSA += 1, pA += 1;  // both pointers up
                    break;
                }
                pA += 1;  // array pointer only
                break;
            case 1:
            default:
            {
                if (*pA != *pSA)
                {
                    pSA = (int*)sa_start;  // reset
                    st  = 0;               // back to search
                    break;
                }
                else
                    pSA += 1, pA += 1;  // both goup
                if (pSA == sa_end) return sa_ix;
                break;
            }
        };  // end switch()
        if (pA >= arr_end) return NULL;
    }
    return NULL;
}

`remove_subarray()`

Using these functions one can write remove_subarray() in a compact way

int remove_subarray(
    int* first_start, int* first_end,
    const int* second_start, const int* second_end)
{
    int* pos = find_sub_array(
        first_start, first_end, second_start, second_end);
    if (pos == NULL) return 0;
    int sz_sub_arr = (int)(second_end - second_start);
    for (int ix = 0; ix < sz_sub_arr; ix += 1)
        *pos++ = INT_MAX;  // set all elements to INT_MAX
    return sz_sub_arr;
}

Testing multiple versions

The version above changes sub_array values. Another one in the example code moves the sub_array to the end. It is just an example anyway.

void test_with(
    int array[], int, int sub_arr[], int sz_sub_arr,
    int (*)(int*, int*, const int*, const int*));

This one accepts the name of the function to apply to the parameters, like a for_each() in C++ or java. It is used in the example like this:

    printf("\nUsing 1st provided set\n");
    int niz3[10] = {1, 1, 2, 3, 5, 6, 1, 2, 4, 10};
    int niz4[3]  = {1, 2, 3};
    test_with(niz3, 10, niz4, 3, remove_subarray);

    int niz1[14] = {1, 2, 3, 4, 5, 6, 7,
                    0, 1, 2, 3, 4, 5, -1};
    int niz2[4]  = {2, 3, 4, 5};
    printf( "\
\nUsing 2nd provided set + function to set sub_array elements to "
        "INT_MAX\n\n");
    test_with(
        niz1, sizeof(niz1) / sizeof(niz1[0]), niz2,
        sizeof(niz2) / sizeof(niz2[0]), mark_subarray);

output for the example code

total of 4 basic tests

Test 1 of 4:
              Base array:    [  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  ]
               Sub_array:    [  1  2  3  ]
3 elements moved. Resulting array:    [  14  15  16  4  5  6  7  8  9  10  11  12  13  1  2  3  ]

Test 2 of 4:
              Base array:    [  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  ]
               Sub_array:    [  1  2  4  ]
0 elements moved. Resulting array:    [  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  ]

Test 3 of 4:
              Base array:    [  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  ]
               Sub_array:    [  14  15  16  ]
[nothing to move: subarray found already at the end]
0 elements moved. Resulting array:    [  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  ]

Test 4 of 4:
              Base array:    [  1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  ]
               Sub_array:    [  13  14  15  ]
3 elements moved. Resulting array:    [  1  2  3  4  5  6  7  8  9  10  11  12  16  13  14  15  ]

Using 1st provided set
              Base array:    [  1  1  2  3  5  6  1  2  4  10  ]
               Sub_array:    [  1  2  3  ]
3 elements moved. Resulting array:    [  1  2  4  10  5  6  1  1  2  3  ]

Using 2nd provided set + function to set sub_array elements to INT_MAX

              Base array:    [  1  2  3  4  5  6  7  0  1  2  3  4  5  -1  ]
               Sub_array:    [  2  3  4  5  ]
4 elements moved. Resulting array:    [  1  2147483647  2147483647  2147483647  2147483647  6  7  0  1  2  3  4  5  -1  ]

Example code

#include <limits.h>
#include <stdio.h>

int* find_sub_array(
    const int*, const int*, const int*, const int*);
int  mark_subarray(int*, int*, const int*, const int*);
int  remove_subarray(int*, int*, const int*, const int*);
int  set_array(int*, size_t);
int  shift_array(int*, int*, int);
int  show_array(const int*, const int*, const char*);
void test_with(
    int array[], int, int sub_arr[], int sz_sub_arr,
    int (*)(int*, int*, const int*, const int*));

int main(void)
{
    int array[16]    = {0};
    int sz_arr       = sizeof(array) / sizeof(array[0]);
    int sub_arr[][3] = {
        {1, 2, 3}, {1, 2, 4}, {14, 15, 16}, {13, 14, 15}};
    const int sz_sub_arr =
        sizeof(sub_arr[0]) / sizeof(sub_arr[0][0]);
    const n_of_tests = sizeof(sub_arr) / sizeof(sub_arr[0]);

    printf("total of %d basic tests\n", n_of_tests);

    for (int tst = 0; tst < n_of_tests; tst += 1)
    {
        printf("\nTest %d of %d:\n", 1 + tst, n_of_tests);
        set_array(array, sz_arr);
        test_with(
            array, sz_arr, sub_arr[tst], sz_sub_arr,
            remove_subarray);
    }

    printf("\nUsing 1st provided set\n");
    int niz3[10] = {1, 1, 2, 3, 5, 6, 1, 2, 4, 10};
    int niz4[3]  = {1, 2, 3};
    test_with(niz3, 10, niz4, 3, remove_subarray);

    int niz1[14] = {1, 2, 3, 4, 5, 6, 7,
                    0, 1, 2, 3, 4, 5, -1};
    int niz2[4]  = {2, 3, 4, 5};
    printf(
        "\
\nUsing 2nd provided set + function to set sub_array elements to "
        "INT_MAX\n\n");
    test_with(
        niz1, sizeof(niz1) / sizeof(niz1[0]), niz2,
        sizeof(niz2) / sizeof(niz2[0]), mark_subarray);

    return 0;
}

int* find_sub_array(
    const int* arr_start, const int* arr_end,
    const int* sa_start, const int* sa_end)
{
    char st    = 0;
    int* pA    = (int*)arr_start;
    int* pSA   = (int*)sa_start;
    int* sa_ix = 0;  // address of the sub_array in array
    while (1)
    {
        switch (st)
        {
            case 0:
                if (*pA == *pSA)
                {
                    st    = 1;
                    sa_ix = pA;         // found 1st
                    pSA += 1, pA += 1;  // both pointers up
                    break;
                }
                pA += 1;  // array pointer only
                break;
            case 1:
            default:
            {
                if (*pA != *pSA)
                {
                    pSA = (int*)sa_start;  // reset
                    st  = 0;               // back to search
                    break;
                }
                else
                    pSA += 1, pA += 1;  // both goup
                if (pSA == sa_end) return sa_ix;
                break;
            }
        };  // end switch()
        if (pA >= arr_end) return NULL;
    }
    return NULL;
}

int mark_subarray(
    int* first_start, int* first_end,
    const int* second_start, const int* second_end)
{
    int* pos = find_sub_array(
        first_start, first_end, second_start, second_end);
    if (pos == NULL) return 0;
    int sz_sub_arr = (int)(second_end - second_start);
    for (int ix = 0; ix < sz_sub_arr; ix += 1)
        *pos++ = INT_MAX;  // set all elements to INT_MAX
    return sz_sub_arr;
}

int remove_subarray(
    int* first_start, int* first_end,
    const int* second_start, const int* second_end)
{
    int* pos = find_sub_array(
        first_start, first_end, second_start, second_end);
    if (pos == NULL) return 0;
    int sz_sub_arr = (int)(second_end - second_start);
    if ((first_end - pos - sz_sub_arr) == 0)
    {  // sub_array found but already at the end
        fprintf(
            stderr,
            "[nothing to move: subarray found already at "
            "the end]\n");
        return 0;
    }
    return shift_array(pos, first_end - 1, sz_sub_arr);
}

int set_array(int* v, size_t len)
{  // set the array from 1 to len
    for (int i = 0; i < len; v[i] = 1 + i, i += 1)
        ;
    return 0;
};

int shift_array(int* src, int* last, int len)
{  // shift sub_array to the end od the array
    int* l = src + len - 1;
    int* r = last;
    for (int x = 0; x < len; x += 1, --r, --l)
    {
        int temp = *r;
        *r       = *l;
        *l       = temp;
    }
    return (int)len;
}

int show_array(
    const int* vct, const int* past_end, const char* msg)
{
    if (msg != NULL) printf("%25s", msg);
    printf("    [");
    for (const int* p = vct; p != past_end; p += 1)
        printf("  %d", *p);
    printf("  ]\n");
    return 0;
}

void test_with(
    int array[], int sz_arr, int sub_arr[], int sz_sub_arr,
    int (*f)(int*, int*, const int*, const int*))
{
    show_array(array, array + sz_arr, "Base array:");
    show_array(
        sub_arr, sub_arr + sz_sub_arr, " Sub_array:");

    int res = (*f)(
        array, array + sz_arr, sub_arr,
        sub_arr + sz_sub_arr);

    char buffer[80] = {0};
    sprintf(
        buffer, "%d elements moved. Resulting array:", res);
    show_array(array, array + sz_arr, buffer);
    return;
}

TL;DR

The second example has just code for the original test cases and find_array()

Example 2

#include <limits.h>
#include <stdio.h>

int* find_sub_array(
    const int*, const int*, const int*, const int*);
int  remove_subarray(int*, int*, const int*, const int*);
int  show_array(const int*, const int*, const char*);

int main(void)
{
    int niz1[14] = {1, 2, 3, 4, 5, 6, 7,
                    0, 1, 2, 3, 4, 5, -1};
    int niz2[4]  = {2, 3, 4, 5};
    printf("\nUsing 1st provided set\n");
    show_array(
        niz1, niz1 + sizeof(niz1) / sizeof(niz1[0]),
        "Base array:");
    show_array(
        niz2, niz2 + sizeof(niz2) / sizeof(niz2[0]),
        " Sub_array:");

    int res = remove_subarray(
        niz1, niz1 + sizeof(niz1) / sizeof(niz1[0]), 
        niz2, niz2 + sizeof(niz2) / sizeof(niz2[0]));

    char buffer[80] = {0};
    sprintf(
        buffer, "%d elements moved. Resulting array:", res);
    show_array(
        niz1, niz1 + sizeof(niz1) / sizeof(niz1[0]),
        "Resulting array:");

    printf("\nUsing 2nd provided set\n");
    int niz3[10] = {1, 1, 2, 3, 5, 6, 1, 2, 4, 10};
    int niz4[3]  = {1, 2, 3};
    show_array(niz3,niz3 + sizeof(niz3) / sizeof(niz3[0]),
        "Base array:");
    show_array(
        niz4,niz4 + sizeof(niz4) / sizeof(niz4[0]),
        " Sub_array:");

    res = remove_subarray(
       niz3,niz3 + sizeof(niz3) / sizeof(niz3[0]),
        niz4,niz4 + sizeof(niz4) / sizeof(niz4[0]));

    sprintf(
        buffer, "%d elements moved. Resulting array:", res);
    show_array(
        niz3, niz3 + sizeof(niz3) / sizeof(niz3[0]),
        "Resulting array:");

    return 0;
}

int* find_sub_array(
    const int* arr_start, const int* arr_end,
    const int* sa_start, const int* sa_end)
{
    char st    = 0;
    int* pA    = (int*)arr_start;
    int* pSA   = (int*)sa_start;
    int* sa_ix = 0;  // address of the sub_array in array
    while (1)
    {
        switch (st)
        {
            case 0:
                if (*pA == *pSA)
                {
                    st    = 1;
                    sa_ix = pA;         // found 1st
                    pSA += 1, pA += 1;  // both pointers up
                    break;
                }
                pA += 1;  // array pointer only
                break;
            case 1:
            default:
            {
                if (*pA != *pSA)
                {
                    pSA = (int*)sa_start;  // reset
                    st  = 0;               // back to search
                    break;
                }
                else
                    pSA += 1, pA += 1;  // both goup
                if (pSA == sa_end) return sa_ix;
                break;
            }
        };  // end switch()
        if (pA >= arr_end) return NULL;
    }
    return NULL;
}

int remove_subarray(
    int* first_start, int* first_end,
    const int* second_start, const int* second_end)
{
    int* pos = find_sub_array(
        first_start, first_end, second_start, second_end);
    if (pos == NULL) return 0;
    int sz_sub_arr = (int)(second_end - second_start);
    for (int ix = 0; ix < sz_sub_arr; ix += 1)
        *pos++ = INT_MAX;  // set all elements to INT_MAX
    return sz_sub_arr;
}

int show_array(
    const int* vct, const int* past_end, const char* msg)
{
    if (msg != NULL) printf("%25s", msg);
    printf("    [");
    for (const int* p = vct; p != past_end; p += 1)
        printf("  %d", *p);
    printf("  ]\n");
    return 0;
}

Output


Using 1st provided set
              Base array:    [  1  2  3  4  5  6  7  0  1  2  3  4  5  -1  ]
               Sub_array:    [  2  3  4  5  ]
         Resulting array:    [  1  2147483647  2147483647  2147483647  2147483647  6  7  0  1  2  3  4  5  -1  ]

Using 2nd provided set
              Base array:    [  1  1  2  3  5  6  1  2  4  10  ]
               Sub_array:    [  1  2  3  ]
         Resulting array:    [  1  2147483647  2147483647  2147483647  5  6  1  2  4  10  ]

Upvotes: 1

dash-o

Reputation: 14452

You might want to take advantage of memory function (memcmp, memcpy), which can speed up the code, and create more elegant solution. I'm not sure if "using pointer arithmetic" implied that arr_start[i] is not allowed. If this is the case, then every reference to X[i] should be replaced with *(X+i), effectively replacing indexing with the equivalent pointer arithmetic.

int remove_sub(int *arr_start, int *arr_end, int *sub_start, int *sub_end)
{
    const int arr_len = arr_end - arr_start;
    const int sub_len = sub_end - sub_start;

    if (sub_len < 1 || sub_len > arr_len)
        return 0;

    // Search in arr position 0 .. (arr_len - sub_len)
    for (int i=0 ; i<arr_len-sub_len ; i++ ) {
        int *curr_arr = arr_start + i ;

        // Move to next element, if match not found
        if ( memcmp(curr_arr, sub_start, sub_len*sizeof(*sub_start) )
            continue ;

        // Match found: remove it and return
        int arr_tail = arr_len - i - sub_len ;
        memmove(curr_arr, curr_arr+sub_len, arr_tail*sizeof(*arr_start)) ;
        return sub_len ;
     }
     // No match found at all, return 0, no change to array.
     return 0 ;
} ;

Disclaimer: I did not compile/test. Possible that there are typos.

Upvotes: 0

Sidharth Mudgil

Reputation: 1461

There is a little mistake in your algorithm, It only compares the first item of the subarray to the array and if it matches it assumes that this is the starting of the subarray without seeing the following items.

header files

#include <stdio.h>

function to remove sub array

int remove_sub(int *arr_start, int *arr_end, int *sub_start, int *sub_end)
{
    const int arr_len = arr_end - arr_start;
    const int sub_len = sub_end - sub_start;

    if (sub_len > arr_len)
        return 0;

    int *a_ptr = arr_start;
    int *s_ptr = sub_start;

    while (a_ptr != arr_end)
    {
        int count = 0;
        int *ptr = a_ptr;
        while (*ptr == *s_ptr && s_ptr != sub_end && ptr != arr_end)
        {
            ptr++;
            s_ptr++;
            count++;
        }
        if (count == sub_len)
        {
            int *start = a_ptr;
            int *end = arr_end;
            int *temp = a_ptr;

            for (int i = 0; i < sub_len; i++)
            {
                while (start != end)
                {
                    *a_ptr = *(++start);
                    a_ptr++;
                }
                a_ptr = temp;
                start = a_ptr;
                end--;
                arr_end--;
            }
        }
        s_ptr = sub_start;
        a_ptr++;
    }

    int *ptr = arr_start;
    while (ptr != arr_end)
    {
        printf("%d ", *ptr);
        ptr++;
    }

    return arr_end - arr_start;
}

main function

int main()
{
    int arr[] = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, -1};
    int sub[] = {2, 3, 4, 5};

    int size_arr = sizeof(arr) / sizeof(arr[0]);
    int size_sub = sizeof(sub) / sizeof(sub[0]);

    int size = remove_sub(arr, arr + size_arr, sub, sub + size_sub);
    printf("size: %d\n", size);

    return 0;
}

Note: Pointer Arithmetic also counts *(arr + i)