Olivia22
Olivia22

Reputation: 151

Optimize Binary Search in sorted array find number of occurences

Im trying to do the smallest number of operations possible to find the number of occurences of an element in the array. Save even 1 operation if possible. So far this is the best binary search version I know. I cant use vectors or any other std:: functions

int modifiedbinsearch_low(int* arr, int low, int high , int key){   
    if(low==high) return high ; 

    int mid = low + (high-low) /2;

    if(key >  arr[mid] ) { modifiedbinsearch_low(arr,mid + 1 , high,key);  } 
    else  { modifiedbinsearch_low(arr,low,mid,key);  }  
}

int modifiedbinsearch_high(int* arr, int low, int high , int key){   
    if(low==high) return high ; 

    int mid = low + (high-low) /2;

    if(key <  arr[mid] ) { modifiedbinsearch_high(arr,low,mid,key);  } 
    else  { modifiedbinsearch_high(arr,mid+1,high,key);  } 

} 

int low = modifiedbinsearch_low( ...)
int high = modifiedbinsearch_high( ...)

This version collapsed both functions into just one but it takes almost double the time. Im wondering the idea is good for it to become the fastest but the implementation is wrong.

#include<stdio.h>
int binarysearch(int a[],int n,int k,bool searchfirst){
    int result=-1;
    int low=0,high=n-1;
    while(low<=high){
        int mid=(low+high)/2;
        if(a[mid]==k)  {
              result=mid; 
           if(searchfirst)
              high=mid-1; 
            else
              low=mid+1;
    }
    else if(k<a[mid])  high=mid-1;
    else low=mid+1;
    }
    return result;
}

int main(){
    int a[]={1,1,1,2,2,3,3,3,6,6,6,6,6,7,7};
    int n=sizeof(a)/sizeof(a[0]);
    int x=6;
    int firstindex=binarysearch(a,n,x,true);
    printf("%d\n",firstindex);
    if(firstindex==-1){
        printf("elment not found in the array:\n ");
    }
    else {
        int lastindex=binarysearch(a,n,x,false);
        printf("%d\n",lastindex);
        printf("count is = %d", lastindex-firstindex+1);
    }

}

Shorter version

      int  binmin(int a[], int start, int end,int val ) {
         if(start<end) {
            int mid = (start+end)/2;
            if(a[mid]>=val) 
                binmin(a,start,mid-1,val);
            else if(a[mid]<val)
                binmin(a,mid+1,end,val);

      }
      else if(start>end)
           return start;
}

Upvotes: 0

Views: 443

Answers (1)

selbie
selbie

Reputation: 104539

Here's a performance issue. In the main while loop, you aren't breaking out fo the loop when you find the target value.

while(low<=high){
    int mid=(low+high)/2;
    if(a[mid]==k)  {
          result=mid;  // you need to break out of the loop here
       if(searchfirst)
          high=mid-1; 
        else
          low=mid+1;
}

But that's not the only issue. The searchfirst value shouldn't be what dictates adjusting high or low as you are doing it. In a classic binary search, adjust the high or low parameter based on how a[mid] compares to k`. Probably closer to this:

while(low<=high) {
    int mid=(low+high)/2;
    if(a[mid]==k)  {
          result=mid;  // you need to break out of the loop here
          break;
    }
    if(a[mid] > k)
        high=mid-1; 
    else
       low=mid+1;
}

You have the right idea. Binary search to find the element. But let me suggest the simpler solution after the initial binary search is to just "scan to the left" and "scan to the right" to count duplicate elements.

Let me know what you think of this:

int binarySearch(int* arr, int start, int end, int value) {
    while (start <= end) {
        int mid = (start + end) / 2;
        if (arr[mid] == value) {
            return mid;
        }
        start = (arr[mid] < value) ? (mid + 1) : start;
        end = (arr[mid] > value) ? (mid - 1) : end;
    }
    return -1;
}

int countSide(int* arr, int length, int index, int value, int step) {
    int count = 0;
    while (index >= 0 && index <= (length - 1) && arr[index] == value) {
        count++;
        index += step;
    }
    return count;
}

int main() {
    int a[] = { 1,1,1,2,2,3,3,3,6,6,6,6,6,7,7 };
    int n = sizeof(a) / sizeof(a[0]);
    int x = 6;
    int firstindex = binarySearch(a, 0, n - 1, x);
    printf("%d\n", firstindex);
    if (firstindex == -1) {
        printf("elment not found in the array:\n ");
    }
    else {
        int count = countSide(a, n, firstindex, x, -1);
        count += countSide(a, n, firstindex, x, 1);
        count--; // because we counted the middle element twice
        printf("count is = %d\n", count);
    }
}

Updated

Here's a solution that does two binary searches to find the lower and upper bounds of the target value in the array and simply measures the distance between the indices to get the count:

int bound(int* arr, int length, int value, bool isUpperBound) {

    int best = -1;
    int start = 0;
    int end = start + length - 1;

    while (start <= end) {
        int mid = (start + end) / 2;
        if (arr[mid] == value) {
            best = mid;

            if (isUpperBound) {
                start = mid + 1;
            }
            else {
                end = mid - 1;
            }
        }
        else if (arr[mid] < value) {
            start = mid + 1;
        }
        else if (arr[mid] > value) {
            end = mid - 1;
        }
    }
    return best;
}


int main() {
    int a[] = { 1,1,1,2,2,3,3,3,6,6,6,6,6,7,7 };
    int n = sizeof(a) / sizeof(a[0]);
    int x = 6;
    int firstindex = bound(a, n, x, false);
    int lastindex = bound(a, n, x, true);
    printf("%d - %d\n", firstindex, lastindex);
    if (firstindex == -1) {
        printf("elment not found in the array:\n ");
    }
    else {
        int count = lastindex-firstindex + 1;
        printf("count is = %d\n", count);
    }
}

Upvotes: 1

Related Questions