Reputation: 8610
I have piece of code in C which removes any duplicate character in a string .But i am doing it in two loops and would like it to optimize to one loop .
void removeDuplicates(char* ptr)
{
int end = 1;
int length = strlen(ptr);
int current;
int i;
current = 1;
for(end=1; end<length; end++)
{
for(i=0; i<end; i++)
{
if(ptr[end] == ptr[i])
break;
}
if(i == end)
{
ptr[current] = ptr[i];
current++;
}
}
ptr[current] = 0;
}
int main(int argc, char** argv)
{
char str[256] = {0,};
gets(str);
removeDuplicates(str);
printf("%s\n", str);
return 1;
}
How can i do this in one loop
Upvotes: 2
Views: 263
Reputation: 881113
You can do this in one pass with something like:
#include <stdio.h>
#include <string.h>
int main (void) {
char str[] = "p123h12p97h62p32h"; // Input string.
int found[256]; // Assumes 8-bit char.
char *src, *dst; // Source and destination pointers.
// Output initial value and set found flags to false.
printf ("Before: [%s]\n", str);
memset (found, 0, sizeof(found));
// One loop, processing each source character in string.
for (src = dst = str; *src != '\0'; src++) {
// If not yet found, flag it found and transfer it, else do nothing.
if (!found[(unsigned int)(*src)]) {
found[(unsigned int)(*src)] = 1;
*dst++ = *src;
}
}
// Insert new end of string, then output it.
*dst = '\0';
printf (" After: [%s]\n", str);
return 0;
}
This removes duplicates in one single pass, using two pointers that advance independently through the string:
src
|
v
p123h12p97h62p32h
^
|
dst
The src
pointer advances by one each iteration of the loop. A character is copied from src
to dst
and the dst
pointer is advanced only if the character has not been seen before (using the found
array). The output is:
Before: [p123h12p97h62p32h]
After: [p123h976]
Note the "assumes 8-bit char"
comment - this is fine where you know the character set is ASCII (or any other 8-bit character set) but it may not be portable to more exotic implementations. You just have to ensure that there are enough elements in the found
array for all of your characters. For example, a 10-bit char
type would need 1024 elements (210 = 1024
).
Upvotes: 4
Reputation: 437336
This is what I initially thought of doing. Micro-optimization to use up less memory for the "char seen" array, otherwise it's the same idea as posted by others.
void removeDuplicates(char* src) {
// Bitfield to remember which chars we 've seen, assumes 8-bit char type
char bitfield[32] = { 0 }; // 32 * 8 = 256 bits
char* dest = src; // initialize "output" ptr
while(*src) {
// Bitfield manipulation
char ch = *src;
int pos = ch >> 3; // ch / 8
int bit = ch & 0x7; // ch % 8
char mask = 1 << bit;
// If char seen for first time, mark and write to "output"
if(!(bitfield[pos] & mask)) {
bitfield[pos] |= mask;
*dest++ = ch;
}
++src;
}
*dest = 0; // null terminator
}
Upvotes: 1
Reputation: 19445
void removeDuplicates(char* ptr)
{
int exists[256] = {0};
int end;
int current = 0;
int length = strlen(ptr);
for(end = 0; end < length; end++)
{
if (exists[ptr[end]]) break;
exists[ptr[end]] = 1;
ptr[current++] = ptr[end];
}
ptr[current] = '\0';
}
int main(int argc, char** argv)
{
char str[256] = {0,};
gets(str);
removeDuplicates(str);
printf("%s\n", str);
return 1;
}
Upvotes: 1