Ryan
Ryan

Reputation: 667

Remove all comments from a C program - any possible improvements to this code?

I'm learning C from the K&R book and for exercise 1.23 in the first chapter, I have to write a program that removes all comments given some C code that the user inputs. This is my completed program so far. Are there any improvements I can make to it?

/**
 Tuesday, 10/07/2013

 Exercise 1.23
 Write a program to remove all comments from a C 
 program. Don't forget to handle quoted strings 
 and character constants properly. C comments   
 don't nest.
**/

#include <stdio.h>
#define MAX_LENGTH 65536
#define NOT_IN_COMMENT 0
#define SINGLE_COMMENT 1
#define MULTI_COMMENT  2

main()
{
    char code[MAX_LENGTH];        /* Buffer that stores the inputted code */
    int size = 0;                 /* Length of the inputted code */
    int loop;                     /* Integer used for the for loop */
    char c;                       /* Character to input into */
    int status = NOT_IN_COMMENT;  /* Are we in a comment? What type? */
    int in_string = 0;            /* Are we inside of a string constant? */
    char last_character;          /* Value of the last character */


    /* Input all code into the buffer until escape sequence pressed */
    while ((c = getchar()) != EOF)
        code[size++] = c; 
    code[size] = '\0'; 


    /* Remove all comments from the code and display results to user */
    for (loop = 0; loop < size; loop++) {
        char current = code[loop]; 

        if (in_string) {
            if (current == '"') in_string = 0; 
            putchar(current);
        }

        else {
            if (status == NOT_IN_COMMENT) {
                if (current == '"') {
                    putchar(current);
                    in_string = 1; 
                    continue; 
                }

                if (current == '/' && last_character == '/') status = SINGLE_COMMENT;
                else if (current == '*' && last_character == '/') status = MULTI_COMMENT; 
                else if (current != '/' || (current == '/' && loop < size-1 && !(code[loop+1] == '/' || code[loop+1] == '*'))) putchar(current); 
            }

            else if (status == SINGLE_COMMENT) {
                if (current == '\n') {
                    status = NOT_IN_COMMENT; 
                    putchar('\n');
                }
            }

            else if (status == MULTI_COMMENT) {
                if (current == '/' && last_character == '*') status = NOT_IN_COMMENT; 
            }
        }

        last_character = current; 
    }
}

Upvotes: 6

Views: 12170

Answers (6)

Prateek Joshi
Prateek Joshi

Reputation: 4067

You can refer the simple code below :

#include <stdio.h>

int main(int argc, char **argv) {
    char code[1000];
    char output[1000];
    char ch;
    int i = 0;
    //store code in array
    while ((ch = getchar()) != EOF) {
        code[i++] = ch;
    }
    code[i] = '\0';
    int index = 0;
    i = 0;
    //store removed comment code in output
    while (code[i] != EOF) {
        if (code[i] == '/' && code[i + 1] == '/') {        //to remove single line comments
            while (code[i] != '\n')
                i++;
        } else if (code[i] == '/' && code[i + 1] == '*') {  //to remove multi line comments
            i = i + 2;
            while (code[i] != '*' && code[i + 1] != '/') {
                i++;
            }
            i = i + 3;
        } else {           //store the rest of the code in output array
            output[index++] = code[i++];
        }

    }
    output[index] = '\0';
    printf("%s", output);
}

INPUT:

#include<stdio.h>
void main()
{
printf("Hello");
/*--------------------------------------------
  ------------------Ignored by compiler-------
  --------------------------------------------
*/
printf("By");
}

OUTPUT:

#include<stdio.h>
void main()
{
printf("Hello");
printf("By");
}

Upvotes: 0

Jerry Jacobs
Jerry Jacobs

Reputation: 315

I like this thread to include a "comment stripper" in my project before handing it to the JSON parser. Only I like a FSM approach better. Hopefully my implementation is understandable and useful to anybody:

#include <stdio.h>
#include <string.h>

void strip(int ch, FILE *stream)
{
    static enum strip_states {
        STRIP_STATE_PUTC = 0,
        STRIP_STATE_SINGLE,
        STRIP_STATE_MULTI,
        STRIP_STATE_STRING,
    } state = STRIP_STATE_PUTC;
    static char _ch = 0;
    static unsigned char _nestlevel = 0;

    /* String */
    if (state == STRIP_STATE_PUTC &&
        ch == '"') {
        state = STRIP_STATE_STRING;
    } else if (state == STRIP_STATE_STRING &&
        ch == '"') {
        state = STRIP_STATE_PUTC;
    /* Multiline */
    } else if (_ch == '/' && ch == '*') {
        if (state == STRIP_STATE_PUTC)
            state = STRIP_STATE_MULTI;
        else if (state == STRIP_STATE_MULTI)
            _nestlevel++;
    } else if (_ch == '*' && ch == '/') {
        if (state == STRIP_STATE_MULTI &&
            _nestlevel > 0)
            _nestlevel--;
        else if (state == STRIP_STATE_MULTI &&
            _nestlevel == 0)
            state = STRIP_STATE_PUTC;
    /* Singleline */
    } else if (state == STRIP_STATE_PUTC &&
            _ch == '/' && ch == '/') {
        state = STRIP_STATE_SINGLE;
    } else if (state == STRIP_STATE_SINGLE &&
            ch == '\n') {
        state = STRIP_STATE_PUTC;
    }

    /* Put character */
    if ((state == STRIP_STATE_PUTC && ch != '/') ||
        state == STRIP_STATE_STRING)
        fputc(ch, stream);

    _ch = ch;
}

int main(void)
{
    int ch;
    while ((ch = fgetc(stdin)) != EOF)
        strip(ch, stdout);

    return 0;
}

What works:

  • Single line comments "xxx // comment"
  • Normal multiline comments "xxx /* comment\n another comment */ yyy"
  • Nested comments "xxx /* comment /* nested comment */ end of comment */ yyy

Currently not implemented and tested:

  • Multiline comment behind single-line comment
  • Line continuation
  • Escaped characters

Kind regards, Jerry

Upvotes: 0

Ankit Ardeshana
Ankit Ardeshana

Reputation: 11

//G H PATEL COLLEGE OF ENGINEERING & TECHNOLOGY.    
//c program to remove comments from given src.txt file, and write back to dest.txt file.
#include <stdio.h>
int main()
{
    FILE *src,*dest;
    char ch,pre,line[100];
    int nflag,qflag,index;

    src=fopen("src.txt","r+");
    dest=fopen("dest.txt","w+");

    nflag=1;

    while(!feof(src))
    {
        index=0;
        for(ch=fgetc(src);ch!=EOF && ch!='\n';)
        {
            if(ch=='"'&&pre!='\\')
            {
                qflag=0;
                for(;ch!='\n' && qflag==0;)
                {
                    line[index++]=ch;
                    pre=ch;
                    ch=fgetc(src);
                    if(ch=='"'&&pre!='\\')
                    {
                        qflag=1;
                        line[index++]=ch;
                        pre=ch;
                        ch=fgetc(src);
                        break;
                    }
                }
            }
            else if(ch=='/')
            {
                pre=ch;
                ch=fgetc(src);
                if(ch=='/')
                {
                    for(;fgetc(src)!='\n';);
                    break;
                }
                if(ch=='*')
                {
                    nflag=1;
                    for(ch=fgetc(src);nflag==1;)
                    {
                        if(ch=='*')
                        {
                            pre=ch;
                            ch=fgetc(src);
                            if(ch=='/')
                            {
                                nflag=0;
                            }
                        }
                        else
                        {
                            pre=ch;
                            ch=fgetc(src);
                        }
                    }
                }
            }
            else
            {
                line[index++]=ch;
                pre=ch;
                ch=fgetc(src);
            }
        }
        line[index]='\0';
        if(index>0)
        {
            line[index] = '\0';
            fprintf(dest,"%s\n",line);
            fflush(stdin);
        }
    }
    getch();
    fclose(src);
    fclose(dest);
    return 0;
}

Upvotes: 0

ChuckCottrill
ChuckCottrill

Reputation: 4444

Move your stripping of comments into a function (more useful), and read one line at a time with fgets(), last_character is ambiguous (does it mean last, or previous?), this uses far fewer calls to putchar(), only one printf (could use puts) per line, preserves most of what you were doing,

#include <stdio.h>
#include <string.h>
#define MAX_LENGTH 65536

#define NOT_IN_COMMENT 0
#define SINGLE_COMMENT 1
#define MULTI_COMMENT  2
int status = NOT_IN_COMMENT;  /* Are we in a comment? What type? */
int in_string = 0;            /* Are we inside of a string constant? */
char* stripcomments(char* stripped,char* code)
{
    int ndx;                      /* index for code[] */
    int ondx;                     /* index for output[] */
    char prevch;                  /* Value of the previous character */
    char ch;                      /* Character to input into */

    /* Remove all comments from the code and display results to user */
    for (ndx=ondx=0; ndx < strlen(code); ndx++)
    {
        char current = code[ndx];

        if (in_string) {
            if (current == '"') in_string = 0;
            stripped[ondx++] = current;
        }
        else {
            if (status == NOT_IN_COMMENT) {
                if (current == '"') {
                    stripped[ondx++] = current;
                    in_string = 1;
                    continue;
                }

                if (current == '/' && prevch == '/') status = SINGLE_COMMENT;
                else if (current == '*' && prevch == '/') status = MULTI_COMMENT;
                else if (current != '/' || (current == '/' && ndx < strlen(code)-1 && !(code[ndx+1] == '/' || code[ndx+1] == '*'))) stripped[ondx++] = current;
            }

            else if (status == SINGLE_COMMENT) {
                if (current == '\n') {
                    status = NOT_IN_COMMENT;
                    stripped[ondx++] = '\n';
                }
            }

            else if (status == MULTI_COMMENT) {
                if (current == '/' && prevch == '*') status = NOT_IN_COMMENT;
            }
        }
        prevch = current;
    }
    stripped[ondx] = '\0';
    return(stripped);
}

int main(void)
{
    char code[MAX_LENGTH];        /* Buffer that stores the inputted code */
    char stripped[MAX_LENGTH];

    while( fgets(code,sizeof(code),stdin) )
    {
        //printf("%s\n",code);
        //strip comments...
        stripcomments(stripped,code);
        if( strlen(stripped) > 0 ) printf("%s",stripped);
    }
}

I'll leave it to you to remove extra blank lines.

Upvotes: 5

Graham Perks
Graham Perks

Reputation: 23400

Looks good to me, well done!

Maybe it could be improved by adding some comments :) As a rough guide, add one for each conditional. You were commenting but stopped just at the juicy part, inside the loop. But the code looks pretty readable as-is.

Does it work? Have you tested it?

Look like it might fail if I have a string containing an escaped double-quote... e.g. "He said, \"Hello, World!\"".

Upvotes: 1

pburka
pburka

Reputation: 1474

When you're handling quoted strings, you should detect escaped quotes (\"). e.g. "\"/* not a comment */\"" is a valid string, but I think your code will strip the false comment from the middle of it.

If you want to be really correct, you should also handle line continuations (a line ending with a \ continues on the next line). For added hairiness, you also ought to handle trigraphs. ??/" is an escaped quote, and ??/ at the end of a line is a continuation.

The style of the code looks pretty good, although main should more properly be declared as int main(void).

Upvotes: 1

Related Questions