Reputation: 11
I managed to write code to skip //
comments in C source:
while (fgets(string, 10000, fin) != NULL)
{
unsigned int i;
for (i = 0; i < strlen(string); i++)
{
if ((string[i] == '/') && (string[i + 1] == '/'))
{
while (string[i += 1] != '\n')
continue;
}
//rest of the code...
I've tried to do similar thing for /**/
comments:
if ((string[i] == '/') && (string[i + 1] == '*'))
{
while (string[i += 1] != '/')
continue;
}
if ((string[i] == '*') && (string[i + 1] == '/'))
{
while (string[i -= 1])
continue;
}
But it reads line by line and if I have, for example,
/*
text*/
then it counts the text.
How do I fix this?
Upvotes: 0
Views: 2522
Reputation: 5347
(It is not very clear what your program is trying to do.)
Using flex to count the number of characters outside comments:
%option noyywrap
%%
int i = 0;
\"([^\\"]|\\.)*\" { i += yyleng ; } // treatment of strings
\/\/.* { } // C++ comments
\/\*([^*]|\*[^/])*\*\/ { } // C comments
.|\n { i += yyleng ; } // normal chars
<<EOF>> { printf("%d\n",i); return;}
%%
int main(){
yylex();
return 0;}
and
$ flex count-non-com.fl
$ cc -o count-non-com lex.yy.c
$ count-non-com < input
One last example: flex code to remove comments (thanks @LuisColorado)
%option noyywrap
%%
\"([^\\"]|\\.)*\" { ECHO; } // treatment of strings
\/\/.* { } // C++ comments
\/\*([^*]|\*[^/])*\*\/ { } // C comments
.|\n { ECHO; } // normal chars
%%
int main(){
yylex();
return 0;}
Upvotes: 0
Reputation: 12668
A simple regular expression for a C comment is:
/\*([^\*]|\*[^\/])*\*\//
(Sorry for the escape characters) This allows any sequence inside a comment except */
. It translates to the following DFA (four states):
/
, next state 1, output none*
, next state 2, no output/
, next state 1, output /
/
and read char*
, next state 3, output none/
, next state 0, output none*
, next state 3, output noneThe possible inputs are /
, *
and any other character. The possible outputs are output read char, output /
and output *
.
This translates to the following code:
#include <stdio.h>
int main()
{
int c, st = 0;
while ((c = getchar()) != EOF) {
switch (st) {
case 0: /* initial state */
switch (c) {
case '/': st = 1; break;
default: putchar(c); break;
} /* switch */
break;
case 1: /* we have read "/" */
switch (c) {
case '/': putchar('/'); break;
case '*': st = 2; break;
default: putchar('/'); putchar(c); st = 0; break;
} /* switch */
break;
case 2: /* we have read "/*" */
switch (c) {
case '*': st = 3; break;
default: break;
} /* switch */
break;
case 3: /* we have read "/* ... *" */
switch (c) {
case '/': st = 0; break;
case '*': break;
default: st = 2; break;
} /* switch */
break;
} /* switch */
} /* while */
} /* main */
In case you want to exclude both types of comments, we need to switch to a fifth state when receiving a second /
, resulting in the following code:
#include <stdio.h>
int main()
{
int c, st = 0;
while ((c = getchar()) != EOF) {
switch (st) {
case 0: /* initial state */
switch (c) {
case '/': st = 1; break;
default: putchar(c); break;
} /* switch */
break;
case 1: /* we have read "/" */
switch (c) {
case '/': st = 4; break;
case '*': st = 2; break;
default: putchar('/'); putchar(c); st = 0; break;
} /* switch */
break;
case 2: /* we have read "/*" */
switch (c) {
case '*': st = 3; break;
default: break;
} /* switch */
break;
case 3: /* we have read "/* ... *" */
switch (c) {
case '/': st = 0; break;
case '*': break;
default: st = 2; break;
} /* switch */
break;
// in the next line we put // inside an `old' comment
// to illustrate this special case. The switch has been put
// after the comment to show it is not being commented out.
case 4: /* we have read "// ..." */ switch(c) {
case '\n': st = 0; putchar('\n'); break;
} // switch (to illustrate this kind of comment).
} /* switch */
} /* while */
} /* main */
Upvotes: 2
Reputation: 418
As user279599 just said,use an integer variable as flag,whenever you get '/' & '' consecutively set flag up(flag=1),then flag value remains 1 until get '' & '/' consecutively. Ignore every character when the flag is 1.
Upvotes: -1
Reputation: 1
Make an int variable. Scan the characters and store the index if you get /*. Continue scanning until you get */. If the variable !=0 at that time, then assume this is the closing comment token and ignore the characters in between.
Upvotes: -1
Reputation: 6421
This simple code can ignore the comment /* */
( doesn't treat all the cases for instance writing /* inside a string between quotes for a variable in c code )
#include <stdio.h>
#include <string.h>
typedef enum bool // false = 0 and true = 1
{ false,true}bool;
int main(int argc, char *argv[])
{
FILE* file=fopen("file","r"); // open the file
bool comment=false;
char str[1001]; // string that will contain portion of the file each time
if (file!=NULL)
{
while (fgets(str,sizeof(str),file)!=NULL)
{
int i=0;
for (i=0;i<strlen(str);i++)
{
if (str[i]=='/' && str[i+1] == '*')
{
comment=true; // comment true we will ignore till the end of comment
i++; // skip the * character
}
else if (str[i]=='*' && str[i+1] == '/')
{
comment=false;
i++; // skip the / character
}
else if (comment==false)
{
printf("%c",str[i]); // if the character not inside comment print it
}
}
}
fclose(file);
}
return 0;
}
Upvotes: 0
Reputation: 180306
Even your supposedly-working code has several problems:
//
appearing within a string constant or within a /* ... */
comment as the beginning of a comment.In the end, C is a stream-oriented language, not a line-oriented language. It should be parsed that way (character by character). To do the job right, you really need to implement a much more sophisticated parser. If you're up for learning a new tool, then you could consider basing your program on the Flex lexical analyzer.
Upvotes: 3