204
204

Reputation: 473

Removing Comments from a file and printing it on console

I have written code to remove comments from a C program file, and print the output on the console:

#include <stdio.h>
#include <stdlib.h>

void incomment(FILE *fp);
void rcomment(int c, FILE *fp);
void echo_quote(int c, FILE *fp);

int main() {
    FILE *fp;
    fp = fopen("temp.c", "r");
    int c;

    while ((c = getc(fp)) != EOF) {
        rcomment(c, fp);
    }
    return 0;
}

void incomment(FILE* fp) {
    int c, d;
    c = getc(fp);
    d = getc(fp);
    while (c != '*' && d != '/') {
        c = d;
        d = getc(fp);
    }
}

void echo_quote(int c, FILE *fp) {
    int d;
    putchar(c);
    while ((d = getc(fp)) != c) {
        putchar(d);
        if (d == '\\')
           putchar(getc(fp));
    }
    putchar(d);
}

void rcomment(int c, FILE *fp) {
    int d;

    if (c == '/') {
        if ((d = getc(fp)) == '*')
            incomment(fp);
        else
        if (d == '/') {
            putchar(c);
            rcomment(d, fp);
        } else {
            putchar(c);
            putchar(d);
        }
    } else
    if (c == '\'' || c == '"')
        echo_quote(c, fp);
    else
        putchar(c);
}

However for the following input:

#include<stdio.h>
/* Author : XYZ
* Date : 21/1/2016
*/
int main()
{
 int a; // / variable a
 printf("/*Hi*/");
 return 0;
 }

OUTPUT:

#include<stdio.h>
 Date : 21/1/2016
 */
 int main()
 {
 int a; // / variable a
 printf("/*Hi*/");
 return 0;
 }

Could someone point out the error in the code. It seems to be working fine for comments within quotes. But not for the single line comments.

Upvotes: 1

Views: 93

Answers (1)

chqrlie
chqrlie

Reputation: 144770

The rcomment() function does not parse the single line comments correctly:

  • If you match a '/' for the second character, you should read all remaining characters upto the newline and output just the newline.

  • If the second character is a quote, you fail to output the first character and parse the literal. An easy way to do this is to unget the second character with ungetc(d, fp); and only output c.

There are other special cases you do not handle:

  • Escaped newlines should be handled in literals and single line comments as well as in between the / and the * at the start of a multi-line comment and between the * and the / at the end. You can do this simply by using a utility function to read the bytes from the file that handles escaped newlines but it will be difficult to output them back to the output file to preserve the line counts.

  • You should replace multiline comments with a single space or newline characters to avoid pasting tokens and to preserve the line count.

  • incomment() and echo_quote() should handle a premature end of file. As currently coded, they run indefinitely.

This parsing task is more subtile than it looks. You could try another approach and implement a state machine.

Here is a quick fix for the rcomment() function, but the other issues above remain:

int peekc(FILE *fp) {
    int c = getc(fp);
    if (c != EOF)
        ungetc(c, fp);
    return c;
}

void rcomment(int c, FILE *fp) {
    int d;

    if (c == '/') {
        if ((d = getc(fp)) == '*') {
            incomment(fp);
        } else
        if (d == '/') {
            while ((c = getc(fp)) != EOF && c != '\n') {
                if (c == '\\' && peekc(fp) == '\n') {
                    putchar(getc(fp));
                }
            }
            putchar('\n');
        } else {
            putchar(c);
            ungetc(d, fp);
        }
    } else
    if (c == '\'' || c == '"') {
        echo_quote(c, fp);
    } else {
        putchar(c);
    }
}

Upvotes: 1

Related Questions