New
New

Reputation: 9

Remove adjacent duplicates in linked list in C

As the title mentioned, I have to remove adjacent duplicates in linked list such that if input is 'google', output should be 'le'. I'm supposed to code it in C. I've written 70% of the code, except that I don't know how to continuously loop till all adjacent duplicates are removed. I'm removing adjacent duplicates in remove_adjacent_duplicates() function, and since I don't know how to put terminating condition in loop, I've merely used if-else loop. But my code in remove_adjacent_duplicates() function might contain mistakes, so please rectify it if any and please give solution to looping till all adjacent duplicates are removed. Here's my code-

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct node //node creation
{
  char data;
  struct node *next;
};

void remove_adjacent_duplicates(struct node** head_ref)
{
    struct node* current = *head_ref; 
    struct node* cnext = NULL; //the one next to current one
    int flag=0;
    
    cnext = current->next; //storing next
    //printf("%c %c %d\n",current->data,cnext->data,flag);
    
    if(cnext->data==current->data)
        {
            flag=1;
            while(cnext->data==current->data)
            {
                cnext=cnext->next;
            }
            current=cnext;
            cnext = current->next; //storing next

        }
        
    else
        {
            current=current->next;
            cnext = current->next; //storing next
            
        }
    //printf("%c %c %d\n",current->data,cnext->data,flag);
    if(flag) *head_ref = current;
    
}

void push(struct node** head_ref, char new_data)
{
    struct node* new_node = (struct node*)malloc(sizeof(struct node));
    new_node->data = new_data;
    new_node->next = *head_ref;
    *head_ref = new_node;
}


void printList(struct node* head)
{
    if (head == NULL)
    {
        printf("NULL\n\n");
        return;
    }
    
    printf("%c->",head->data);
    printList(head->next);
}

int main()
{
    char s[100];
    int i;
    struct node* a = NULL;
    
    printf("Enter string: ");
    scanf("%s",s);
    
    for(i=strlen(s)-1;i>-1;i--){
        push(&a, s[i]); //last in first out, so in reverse g is last but first to come out
    }

    printf("\nConverting string to linked list: \n");
    printList(a);

    
    //printf("%c",current->data); prints first letter of a 
    remove_adjacent_duplicates(&a);
    
    printList(a);


    return 0;
}

Upvotes: 0

Views: 164

Answers (2)

Craig Estey
Craig Estey

Reputation: 33666

A few issues ...

  1. The first element of list (e.g. head) can never be a duplicate
  2. The code leaks memory when removing a dup because it doesn't do free
  3. The code only removes the first element.
  4. The code uses next, cur, but not previous, so the algorithm needs refactoring.
  5. Casting the return of malloc is bad. See: Do I cast the result of malloc?
  6. scanf is problematic. %s can overrun the end of the array. Better to use (e.g.) %99s [or better yet: fgets].

Here is the refactored code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// node definition
struct node {
    char data;
    struct node *next;
};

void
remove_adjacent_duplicates(struct node **head_ref)
{
    struct node *prev = *head_ref;
    struct node *cur;
    struct node *next;

    // NOTE: first node can _never_ be a dup
    if (prev != NULL)
        cur = prev->next;
    else
        cur = NULL;

    for (;  cur != NULL;  cur = next) {
        // remember this in case we remove cur to prevent "use after free" bug
        // when advancing cur
        next = cur->next;

        // remove duplicate
        if (cur->data == prev->data) {
            prev->next = next;
            free(cur);
            continue;
        }

        // point to last non-dup node
        prev = cur;
    }
}

void
push(struct node **head_ref, char new_data)
{
// NOTE/BUG: casting the result of malloc is bad
#if 0
    struct node *new_node = (struct node *) malloc(sizeof(struct node));
#else
    struct node *new_node = malloc(sizeof(*new_node));
#endif

    new_node->data = new_data;
    new_node->next = *head_ref;

    *head_ref = new_node;
}

void
printList(struct node *head)
{
    if (head == NULL) {
        printf("NULL\n\n");
        return;
    }

    printf("%c->", head->data);
    printList(head->next);
}

int
main(void)
{
    char s[100];
    int i;
    struct node *a = NULL;

    printf("Enter string: ");
// NOTE/BUG: scanf is bad -- it can overrun the end of s
#if 0
    scanf("%s", s);
#else
    scanf("%99s", s);
#endif

    // last in first out, so in reverse g is last but first to come out
    for (i = strlen(s) - 1; i > -1; i--) {
        push(&a, s[i]);
    }

    printf("\nConverting string to linked list: \n");
    printList(a);

    // printf("%c",current->data); prints first letter of a
    remove_adjacent_duplicates(&a);

    printList(a);

    return 0;
}

In the above code, I've used cpp conditionals to denote old vs. new code:

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

Note: this can be cleaned up by running the file through unifdef -k


UPDATE:

From the OP: "[ .. ] 'google', output should be 'le'.". Looks like both nodes are removed, and the effect compounds. – Oka

Yes, it's much more complex. But, here is a version that removes all duplicates. I had some trouble myself, so I left in the debugging code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if DEBUG
#define dbgprt(_fmt...) \
    printf(_fmt)
#else
#define dbgprt(_fmt...) \
    do { } while (0)
#endif

// node definition
struct node {
    char data;
#if DEBUG
    int seq;
#endif
    struct node *next;
};

#if DEBUG
int seq = 0;
#endif

void
remove_adjacent_duplicates(struct node **head_ref)
{
    struct node *prev = *head_ref;
    struct node *cur;
    struct node *next;

    // NOTE: first node can _never_ be a dup
    if (prev != NULL)
        cur = prev->next;
    else
        cur = NULL;

    for (;  cur != NULL;  cur = next) {
        // remember this in case we remove cur to prevent "use after free" bug
        // when advancing cur
        next = cur->next;

        // remove duplicate
        if (cur->data == prev->data) {
            prev->next = next;
            free(cur);
            continue;
        }

        // point to last non-dup node
        prev = cur;
    }
}

void
remove_all_duplicates(struct node **head_ref)
{
    struct node *oldhead = *head_ref;

    struct node *addprev = NULL;
    struct node *addnext;

    // loop through all candidate nodes
    for (struct node *addcur = oldhead;  addcur != NULL;  addcur = addnext) {
        int dupflg = 0;

        // start of search for duplicates to the right of the candidate
        struct node *prevdup = NULL;
        struct node *dupcur = addcur->next;

        // find all duplicates to the right [towards tail] of candidate node
        while (1) {
            // find first duplicate to the right of candidate [if one exists]
            struct node *dupnext = NULL;
            for (;  dupcur != NULL;  dupcur = dupnext) {
                dupnext = dupcur->next;
                if (dupcur->data == addcur->data) {
                    dupflg = 1;
                    break;
                }
                prevdup = dupcur;
            }

            // no more duplicates to the right of current candidate
            if (dupcur == NULL)
                break;

            // remove a duplicate on the right
            if (prevdup != NULL)
                prevdup->next = dupnext;
            else
                addcur->next = dupnext;
            free(dupcur);
        }

        addnext = addcur->next;

        // remove candidate because it's a dup
        if (dupflg) {
            if (addprev != NULL)
                addprev->next = addnext;
            else
                oldhead = addnext;
            free(addcur);
            continue;
        }

        // remember last valid non-dup node
        addprev = addcur;
    }

    *head_ref = oldhead;
}

void
push(struct node **head_ref, char new_data)
{
// NOTE/BUG: casting the result of malloc is bad
#if 0
    struct node *new_node = (struct node *) malloc(sizeof(struct node));
#else
    struct node *new_node = malloc(sizeof(*new_node));
#endif

    new_node->data = new_data;
#if DEBUG
    new_node->seq = seq++;
#endif
    new_node->next = *head_ref;

    *head_ref = new_node;
}

void
printList(struct node *head)
{
    if (head == NULL) {
        printf("NULL\n\n");
        return;
    }

#if DEBUG
    printf("%c%d->", head->data, head->seq);
#else
    printf("%c->", head->data);
#endif
    printList(head->next);
}

int
main(int argc,char **argv)
{
    char s[100];
    int opt_a = 0;
    int i;
    struct node *a = NULL;

    --argc;
    ++argv;

    for (;  argc > 0;  --argc, ++argv) {
        char *cp = *argv;
        if (*cp != '-')
            break;

        switch (cp[1]) {
        case 'a':
            opt_a = ! opt_a;
            break;
        }
    }

    printf("Enter string: ");
// NOTE/BUG: scanf is bad -- it can overrun the end of s
#if 0
    scanf("%s", s);
#else
    fflush(stdout);
    if (fgets(s,sizeof(s),stdin) == NULL)
        s[0] = 0;
    s[strcspn(s,"\n")] = 0;
#endif

    // last in first out, so in reverse g is last but first to come out
    for (i = strlen(s) - 1; i > -1; i--)
        push(&a, s[i]);

    printf("\nConverting string to linked list: \n");
    printList(a);

    // printf("%c",current->data); prints first letter of a
    if (opt_a)
        remove_adjacent_duplicates(&a);
    else
        remove_all_duplicates(&a);

    printList(a);

    return 0;
}

UPDATE:

As for your code I can't understand it properly especially those # statements since I'm just an average coder in C with no advanced knowledge. – New

The # lines aren't really advanced coding. They are C preprocessor (i.e. cpp) directives, similar to #define, #ifdef, #ifndef, and #endif. See the compiler manpage and/or man cpp.

They include/eliminate code at compile time in a separate first stage of the compilation process (i.e. the cpp stage).

Otherwise, the code is well commented to explain the intent of what the code is doing.

Side note: When I was first learning to code, in addition to school assignments, I was looking at some complex OS kernel code [in assembly language]. I just kept going over it, sometimes adding my own comments, until I did understand it. I learned more by reading and understanding such code than I did from most assignments.

At the bottom of my answer: What is the error in this code that checks if the linklist is a palindrome or not? is a list of resources I recommend.

Here is a cleaned up version of the my code above that eliminates the conditional cpp directives:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// node definition
struct node {
    char data;
    struct node *next;
};

void
remove_adjacent_duplicates(struct node **head_ref)
{
    struct node *prev = *head_ref;
    struct node *cur;
    struct node *next;

    // NOTE: first node can _never_ be a dup
    if (prev != NULL)
        cur = prev->next;
    else
        cur = NULL;

    for (;  cur != NULL;  cur = next) {
        // remember this in case we remove cur to prevent "use after free" bug
        // when advancing cur
        next = cur->next;

        // remove duplicate
        if (cur->data == prev->data) {
            prev->next = next;
            free(cur);
            continue;
        }

        // point to last non-dup node
        prev = cur;
    }
}

void
remove_all_duplicates(struct node **head_ref)
{
    struct node *oldhead = *head_ref;

    struct node *addprev = NULL;
    struct node *addnext;

    // loop through all candidate nodes
    for (struct node *addcur = oldhead;  addcur != NULL;  addcur = addnext) {
        int dupflg = 0;

        // start of search for duplicates to the right of the candidate
        struct node *prevdup = NULL;
        struct node *dupcur = addcur->next;

        // find all duplicates to the right [towards tail] of candidate node
        while (1) {
            // find first duplicate to the right of candidate [if one exists]
            struct node *dupnext = NULL;
            for (;  dupcur != NULL;  dupcur = dupnext) {
                dupnext = dupcur->next;
                if (dupcur->data == addcur->data) {
                    dupflg = 1;
                    break;
                }
                prevdup = dupcur;
            }

            // no more duplicates to the right of current candidate
            if (dupcur == NULL)
                break;

            // remove a duplicate on the right
            if (prevdup != NULL)
                prevdup->next = dupnext;
            else
                addcur->next = dupnext;
            free(dupcur);
        }

        addnext = addcur->next;

        // remove candidate because it's a dup
        if (dupflg) {
            if (addprev != NULL)
                addprev->next = addnext;
            else
                oldhead = addnext;
            free(addcur);
            continue;
        }

        // remember last valid non-dup node
        addprev = addcur;
    }

    *head_ref = oldhead;
}

void
push(struct node **head_ref, char new_data)
{
    struct node *new_node = malloc(sizeof(*new_node));

    new_node->data = new_data;
    new_node->next = *head_ref;

    *head_ref = new_node;
}

void
printList(struct node *head)
{
    if (head == NULL) {
        printf("NULL\n\n");
        return;
    }

    printf("%c->", head->data);
    printList(head->next);
}

int
main(int argc,char **argv)
{
    char s[100];
    int opt_a = 0;
    int i;
    struct node *a = NULL;

    --argc;
    ++argv;

    for (;  argc > 0;  --argc, ++argv) {
        char *cp = *argv;
        if (*cp != '-')
            break;

        switch (cp[1]) {
        case 'a':
            opt_a = ! opt_a;
            break;
        }
    }

    printf("Enter string: ");
    fflush(stdout);
    if (fgets(s,sizeof(s),stdin) == NULL)
        s[0] = 0;
    s[strcspn(s,"\n")] = 0;

    // last in first out, so in reverse g is last but first to come out
    for (i = strlen(s) - 1; i > -1; i--)
        push(&a, s[i]);

    printf("\nConverting string to linked list: \n");
    printList(a);

    // printf("%c",current->data); prints first letter of a
    if (opt_a)
        remove_adjacent_duplicates(&a);
    else
        remove_all_duplicates(&a);

    printList(a);

    return 0;
}

Upvotes: 1

trincot
trincot

Reputation: 351403

You could use recursion. That way you can check whether before the recursive call or after the recursive call there is something to remove:

void remove_adjacent_duplicates(struct node** head_ref)
{
    struct node* current = *head_ref;
    if (current == NULL || current->next == NULL) return;
    int isEqual = current->data == current->next->data; 
    remove_adjacent_duplicates(&current->next);
    if (current->next != NULL && current->data == current->next->data) {
        // Duplicates! Remove pair
        *head_ref = current->next->next;
        free(current->next);
        free(current);
    } else if (isEqual) {
        // Continue ongoing removal
        *head_ref = current->next;
        free(current);
    }
}

Upvotes: 2

Related Questions