Amit Singh Tomar
Amit Singh Tomar

Reputation: 8610

How to optimize this piece of code In c

I have piece of code in C which removes any duplicate character in a string .But i am doing it in two loops and would like it to optimize to one loop .

void removeDuplicates(char* ptr)
{
 int end = 1;
 int length = strlen(ptr);
 int current;
 int i;
 current = 1;
 for(end=1; end<length; end++)
 {
    for(i=0; i<end; i++)
    {
        if(ptr[end] == ptr[i])
        break;
    }
    if(i == end)
    {
      ptr[current] = ptr[i];
      current++;
    }
   }
   ptr[current] = 0;
 }

 int main(int argc, char** argv)
 {
  char str[256] = {0,};
  gets(str);
  removeDuplicates(str);
  printf("%s\n", str);
  return 1;
 }

How can i do this in one loop

Upvotes: 2

Views: 263

Answers (3)

paxdiablo
paxdiablo

Reputation: 881113

You can do this in one pass with something like:

#include <stdio.h>
#include <string.h>

int main (void) {
    char str[] = "p123h12p97h62p32h";    // Input string.
    int found[256];                      // Assumes 8-bit char.
    char *src, *dst;                     // Source and destination pointers.

    // Output initial value and set found flags to false.

    printf ("Before: [%s]\n", str);
    memset (found, 0, sizeof(found));

    // One loop, processing each source character in string.

    for (src = dst = str; *src != '\0'; src++) {
        // If not yet found, flag it found and transfer it, else do nothing.

        if (!found[(unsigned int)(*src)]) {
            found[(unsigned int)(*src)] = 1;
            *dst++ = *src;
        }
    }

    // Insert new end of string, then output it.

    *dst = '\0';
    printf (" After: [%s]\n", str);

    return 0;
}

This removes duplicates in one single pass, using two pointers that advance independently through the string:

src
 |
 v
 p123h12p97h62p32h
 ^
 |
dst

The src pointer advances by one each iteration of the loop. A character is copied from src to dst and the dst pointer is advanced only if the character has not been seen before (using the found array). The output is:

Before: [p123h12p97h62p32h]
 After: [p123h976]

Note the "assumes 8-bit char" comment - this is fine where you know the character set is ASCII (or any other 8-bit character set) but it may not be portable to more exotic implementations. You just have to ensure that there are enough elements in the found array for all of your characters. For example, a 10-bit char type would need 1024 elements (210 = 1024).

Upvotes: 4

Jon
Jon

Reputation: 437336

This is what I initially thought of doing. Micro-optimization to use up less memory for the "char seen" array, otherwise it's the same idea as posted by others.

void removeDuplicates(char* src) {
    // Bitfield to remember which chars we 've seen, assumes 8-bit char type
    char bitfield[32] = { 0 }; // 32 * 8 = 256 bits

    char* dest = src; // initialize "output" ptr

    while(*src) {
        // Bitfield manipulation
        char ch = *src;
        int pos = ch >> 3; // ch / 8
        int bit = ch & 0x7; // ch % 8
        char mask = 1 << bit;

        // If char seen for first time, mark and write to "output"
        if(!(bitfield[pos] & mask)) {
            bitfield[pos] |= mask;
            *dest++ = ch;
        }
        ++src;
    }

    *dest = 0; // null terminator
}

See it in action.

Upvotes: 1

Roee Gavirel
Roee Gavirel

Reputation: 19445

void removeDuplicates(char* ptr)
{
    int exists[256] = {0};
    int end;
    int current = 0;
    int length = strlen(ptr);

    for(end = 0; end < length; end++)
    {
        if (exists[ptr[end]]) break;

        exists[ptr[end]] = 1;
        ptr[current++] = ptr[end];
    }

    ptr[current] = '\0';
}

int main(int argc, char** argv)
{
    char str[256] = {0,};
    gets(str);
    removeDuplicates(str);
    printf("%s\n", str);
    return 1;
}

Upvotes: 1

Related Questions