Reputation: 667
I'm learning C from the K&R book and for exercise 1.23 in the first chapter, I have to write a program that removes all comments given some C code that the user inputs. This is my completed program so far. Are there any improvements I can make to it?
/**
Tuesday, 10/07/2013
Exercise 1.23
Write a program to remove all comments from a C
program. Don't forget to handle quoted strings
and character constants properly. C comments
don't nest.
**/
#include <stdio.h>
#define MAX_LENGTH 65536
#define NOT_IN_COMMENT 0
#define SINGLE_COMMENT 1
#define MULTI_COMMENT 2
main()
{
char code[MAX_LENGTH]; /* Buffer that stores the inputted code */
int size = 0; /* Length of the inputted code */
int loop; /* Integer used for the for loop */
char c; /* Character to input into */
int status = NOT_IN_COMMENT; /* Are we in a comment? What type? */
int in_string = 0; /* Are we inside of a string constant? */
char last_character; /* Value of the last character */
/* Input all code into the buffer until escape sequence pressed */
while ((c = getchar()) != EOF)
code[size++] = c;
code[size] = '\0';
/* Remove all comments from the code and display results to user */
for (loop = 0; loop < size; loop++) {
char current = code[loop];
if (in_string) {
if (current == '"') in_string = 0;
putchar(current);
}
else {
if (status == NOT_IN_COMMENT) {
if (current == '"') {
putchar(current);
in_string = 1;
continue;
}
if (current == '/' && last_character == '/') status = SINGLE_COMMENT;
else if (current == '*' && last_character == '/') status = MULTI_COMMENT;
else if (current != '/' || (current == '/' && loop < size-1 && !(code[loop+1] == '/' || code[loop+1] == '*'))) putchar(current);
}
else if (status == SINGLE_COMMENT) {
if (current == '\n') {
status = NOT_IN_COMMENT;
putchar('\n');
}
}
else if (status == MULTI_COMMENT) {
if (current == '/' && last_character == '*') status = NOT_IN_COMMENT;
}
}
last_character = current;
}
}
Upvotes: 6
Views: 12170
Reputation: 4067
You can refer the simple code below :
#include <stdio.h>
int main(int argc, char **argv) {
char code[1000];
char output[1000];
char ch;
int i = 0;
//store code in array
while ((ch = getchar()) != EOF) {
code[i++] = ch;
}
code[i] = '\0';
int index = 0;
i = 0;
//store removed comment code in output
while (code[i] != EOF) {
if (code[i] == '/' && code[i + 1] == '/') { //to remove single line comments
while (code[i] != '\n')
i++;
} else if (code[i] == '/' && code[i + 1] == '*') { //to remove multi line comments
i = i + 2;
while (code[i] != '*' && code[i + 1] != '/') {
i++;
}
i = i + 3;
} else { //store the rest of the code in output array
output[index++] = code[i++];
}
}
output[index] = '\0';
printf("%s", output);
}
INPUT:
#include<stdio.h>
void main()
{
printf("Hello");
/*--------------------------------------------
------------------Ignored by compiler-------
--------------------------------------------
*/
printf("By");
}
OUTPUT:
#include<stdio.h>
void main()
{
printf("Hello");
printf("By");
}
Upvotes: 0
Reputation: 315
I like this thread to include a "comment stripper" in my project before handing it to the JSON parser. Only I like a FSM approach better. Hopefully my implementation is understandable and useful to anybody:
#include <stdio.h>
#include <string.h>
void strip(int ch, FILE *stream)
{
static enum strip_states {
STRIP_STATE_PUTC = 0,
STRIP_STATE_SINGLE,
STRIP_STATE_MULTI,
STRIP_STATE_STRING,
} state = STRIP_STATE_PUTC;
static char _ch = 0;
static unsigned char _nestlevel = 0;
/* String */
if (state == STRIP_STATE_PUTC &&
ch == '"') {
state = STRIP_STATE_STRING;
} else if (state == STRIP_STATE_STRING &&
ch == '"') {
state = STRIP_STATE_PUTC;
/* Multiline */
} else if (_ch == '/' && ch == '*') {
if (state == STRIP_STATE_PUTC)
state = STRIP_STATE_MULTI;
else if (state == STRIP_STATE_MULTI)
_nestlevel++;
} else if (_ch == '*' && ch == '/') {
if (state == STRIP_STATE_MULTI &&
_nestlevel > 0)
_nestlevel--;
else if (state == STRIP_STATE_MULTI &&
_nestlevel == 0)
state = STRIP_STATE_PUTC;
/* Singleline */
} else if (state == STRIP_STATE_PUTC &&
_ch == '/' && ch == '/') {
state = STRIP_STATE_SINGLE;
} else if (state == STRIP_STATE_SINGLE &&
ch == '\n') {
state = STRIP_STATE_PUTC;
}
/* Put character */
if ((state == STRIP_STATE_PUTC && ch != '/') ||
state == STRIP_STATE_STRING)
fputc(ch, stream);
_ch = ch;
}
int main(void)
{
int ch;
while ((ch = fgetc(stdin)) != EOF)
strip(ch, stdout);
return 0;
}
What works:
"xxx // comment"
"xxx /* comment\n another comment */ yyy"
"xxx /* comment /* nested comment */ end of comment */ yyy
Currently not implemented and tested:
Kind regards, Jerry
Upvotes: 0
Reputation: 11
//G H PATEL COLLEGE OF ENGINEERING & TECHNOLOGY.
//c program to remove comments from given src.txt file, and write back to dest.txt file.
#include <stdio.h>
int main()
{
FILE *src,*dest;
char ch,pre,line[100];
int nflag,qflag,index;
src=fopen("src.txt","r+");
dest=fopen("dest.txt","w+");
nflag=1;
while(!feof(src))
{
index=0;
for(ch=fgetc(src);ch!=EOF && ch!='\n';)
{
if(ch=='"'&&pre!='\\')
{
qflag=0;
for(;ch!='\n' && qflag==0;)
{
line[index++]=ch;
pre=ch;
ch=fgetc(src);
if(ch=='"'&&pre!='\\')
{
qflag=1;
line[index++]=ch;
pre=ch;
ch=fgetc(src);
break;
}
}
}
else if(ch=='/')
{
pre=ch;
ch=fgetc(src);
if(ch=='/')
{
for(;fgetc(src)!='\n';);
break;
}
if(ch=='*')
{
nflag=1;
for(ch=fgetc(src);nflag==1;)
{
if(ch=='*')
{
pre=ch;
ch=fgetc(src);
if(ch=='/')
{
nflag=0;
}
}
else
{
pre=ch;
ch=fgetc(src);
}
}
}
}
else
{
line[index++]=ch;
pre=ch;
ch=fgetc(src);
}
}
line[index]='\0';
if(index>0)
{
line[index] = '\0';
fprintf(dest,"%s\n",line);
fflush(stdin);
}
}
getch();
fclose(src);
fclose(dest);
return 0;
}
Upvotes: 0
Reputation: 4444
Move your stripping of comments into a function (more useful), and read one line at a time with fgets(), last_character is ambiguous (does it mean last, or previous?), this uses far fewer calls to putchar(), only one printf (could use puts) per line, preserves most of what you were doing,
#include <stdio.h>
#include <string.h>
#define MAX_LENGTH 65536
#define NOT_IN_COMMENT 0
#define SINGLE_COMMENT 1
#define MULTI_COMMENT 2
int status = NOT_IN_COMMENT; /* Are we in a comment? What type? */
int in_string = 0; /* Are we inside of a string constant? */
char* stripcomments(char* stripped,char* code)
{
int ndx; /* index for code[] */
int ondx; /* index for output[] */
char prevch; /* Value of the previous character */
char ch; /* Character to input into */
/* Remove all comments from the code and display results to user */
for (ndx=ondx=0; ndx < strlen(code); ndx++)
{
char current = code[ndx];
if (in_string) {
if (current == '"') in_string = 0;
stripped[ondx++] = current;
}
else {
if (status == NOT_IN_COMMENT) {
if (current == '"') {
stripped[ondx++] = current;
in_string = 1;
continue;
}
if (current == '/' && prevch == '/') status = SINGLE_COMMENT;
else if (current == '*' && prevch == '/') status = MULTI_COMMENT;
else if (current != '/' || (current == '/' && ndx < strlen(code)-1 && !(code[ndx+1] == '/' || code[ndx+1] == '*'))) stripped[ondx++] = current;
}
else if (status == SINGLE_COMMENT) {
if (current == '\n') {
status = NOT_IN_COMMENT;
stripped[ondx++] = '\n';
}
}
else if (status == MULTI_COMMENT) {
if (current == '/' && prevch == '*') status = NOT_IN_COMMENT;
}
}
prevch = current;
}
stripped[ondx] = '\0';
return(stripped);
}
int main(void)
{
char code[MAX_LENGTH]; /* Buffer that stores the inputted code */
char stripped[MAX_LENGTH];
while( fgets(code,sizeof(code),stdin) )
{
//printf("%s\n",code);
//strip comments...
stripcomments(stripped,code);
if( strlen(stripped) > 0 ) printf("%s",stripped);
}
}
I'll leave it to you to remove extra blank lines.
Upvotes: 5
Reputation: 23400
Looks good to me, well done!
Maybe it could be improved by adding some comments :) As a rough guide, add one for each conditional. You were commenting but stopped just at the juicy part, inside the loop. But the code looks pretty readable as-is.
Does it work? Have you tested it?
Look like it might fail if I have a string containing an escaped double-quote... e.g. "He said, \"Hello, World!\""
.
Upvotes: 1
Reputation: 1474
When you're handling quoted strings, you should detect escaped quotes (\"
). e.g. "\"/* not a comment */\""
is a valid string, but I think your code will strip the false comment from the middle of it.
If you want to be really correct, you should also handle line continuations (a line ending with a \
continues on the next line). For added hairiness, you also ought to handle trigraphs. ??/"
is an escaped quote, and ??/
at the end of a line is a continuation.
The style of the code looks pretty good, although main should more properly be declared as int main(void)
.
Upvotes: 1