Reputation: 11
For my computer science class I have to come up with program to tokenize strings for a given input file using the maximal munch algorithm. This is what my friend and I have done so far.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
FILE *
input_from_args(int argc, const char *argv[])
{
if(argc = 1)
{
return stdin;
}
else
{
return fopen(argv[1], "r");
}
}
int main(int argc, const char* argv[])
{
FILE *src = input_from_args(argc, argv);
if(src == NULL)
{
fprintf(stderr, "%s: unable to open %s\n", argv[0], argv[1]);
exit(EXIT_FAILURE);
}
int c, nc;
while((c = getchar()) != EOF || (c = getchar()) != '\n')
{
if(c == ' ' || c == '\t')
{
printf("Blank Space");
}
else if(c == '+' || c == '-' || c == '*' || c == '/' || c == '^')
{
if(c == '+')
{
printf("%s \t addition operator\n", c);
}
else if(c == '-')
{
printf("%s \t subtraction operator\n", c);
}
else if(c == '*')
{
printf("%s \t multiplication operator\n", c);
}
if(c == '/')
{
printf("%s \t division operator\n", c);
}
else if(c == '^')
{
printf("%s \t exponentiation\n", c);
}
}
else if(c == '(' || c == ')')
{
if(c == '(')
{
printf("%s \t open parentheses\n", c);
}
else if(c == ')')
{
printf("%s \t close parentheses\n", c);
}
}
else if(c == '<' || c == '>' || c == '=')
{
if(c == '<' && (nc = getchar()) == '=')
{
printf("%s%s \t comparison operator\n", c, nc);
}
else if(c == '>' && (nc = getchar()) == '=')
{
printf("%s%s \t comparison operator\n", c, nc);
}
else if(c == '>' || c == '<')
{
printf("%s \t comparison operator\n", c);
}
else if(c == '=')
{
printf("%s \t equals operator\n", c);
}
}
else if(isdigit(c))
{
printf("%s \t simple numeral \n", c);
}
else if(isalpha(c))
{
printf("%s \t invalid token\n", c);
}
}
}
When we compile the program we get the following warning:
format argument is not a pointer (arg 2).
Upvotes: 1
Views: 1092
Reputation: 84561
Following up on an earlier comment, your block of code comparing <
, >
and =
is susceptible to error and will never match <=
or >=
as written. As part of your routine, you are reading c
twice at the beginning of your while
loop. Forcing the read of two characters, you will never have nc
read =
in <=
because the second character has already been consumed:
while((c = getchar()) != EOF || (c = getchar()) != '\n')
While the intent may be to strip the newline from the input buffer, the effect is to prevent any character from remaining for nc
to read. A better way to structure the beginning of the loop may be:
while (1)
{
c = fgetc (src);
if (c == 'q' || c == EOF) /* just `q` for quit :) */
break;
...
In addition to this read issue, if the user enters anything other than <=
or >=
you always report comparison operator
. Take for example <h
, you check c
for <
or >
and then read nc
(in this case h
). While nc
does not match =
, the very next condition is satisfied:
else if(c == '>' || c == '<')
This results in your code outputting comparison operator
no matter what the character following the >
or <
sign was. A little rewrite of the logic could prevent the error. For example change:
else if(c == '<' || c == '>' || c == '=')
{
...
}
to:
else if ( c == '=')
{
printf("\t %c \t equals operator\n", c);
}
else if(c == '<' || c == '>')
{
if ((nc = fgetc (src)) == '=')
{
if(c == '<')
{
printf("\t %c= \t comparison equivalence operator\n", c);
}
else if(c == '>')
{
printf("\t %c= \t comparison equivalence operator\n", c);
}
}
else
{
printf("\t %c \t comparison operator\n", c);
ungetc (nc, src); /* the next character may be important */
}
}
Next, if you noticed the use of fgetc
above, it is important. At the beginning of your code you want to use either stdin
or a file for input. However, throughout your code you have used getchar()
for reading the character. This will not work if the user supplies a filename as the first argument. You may open the file, but you will be left with a blinking cursor waiting for input on stdin
. If you intend to have the option of reading from a file, you need to change each:
getchar()
to
fgetc (src)
Lastly, and this is just a pet peeve. I you are asking the user for input, give the user a prompt so they know they are supposed to do something and are not just left wondering if the program is hung. (a simple input:
is fine). Since you have structured the program to read from either stdin
or a file, it may not be apparent how to only prompt in the case the program is reading from stdin
(no prompt is wanted or needed when reading from a file). The solution is to simply pass a pointer to a flag as an argument to your input_from_args
function. You function sets the flag if reading from stdin
or leaves it 0
if reading from a file. That allows you to provide a prompt as follows:
if (stdf) printf (" input : "); /* stdf being the stdinflag */
Putting it all together, an updated version of the code that reads from either a file or stdin
could be similar to:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
FILE *
input_from_args (int argc, const char *argv[], size_t *stdinflag)
{
if (argc == 1)
{
*stdinflag = 1;
return stdin;
}
else
{
*stdinflag = 0;
return fopen(argv[1], "r");
}
}
int main (int argc, const char **argv)
{
size_t stdf = 0;
FILE *src = input_from_args (argc, argv, &stdf);
if(src == NULL)
{
fprintf(stderr, "%s: unable to open %s\n", argv[0], argv[1]);
exit(EXIT_FAILURE);
}
int c = 0;
int nc = 0;
int strip = 0;
while (1)
{
if (stdf) printf (" input : ");
c = fgetc (src);
if(c == ' ' || c == '\t')
{
printf("\t Blank Space\n");
}
else if(c == '+' || c == '-' || c == '*' || c == '/' || c == '^')
{
if(c == '+')
{
printf("\t %c \t addition operator\n", c);
}
else if(c == '-')
{
printf("\t %c \t subtraction operator\n", c);
}
else if(c == '*')
{
printf("\t %c \t multiplication operator\n", c);
}
if(c == '/')
{
printf("\t %c \t division operator\n", c);
}
else if(c == '^')
{
printf("\t %c \t exponentiation\n", c);
}
}
else if(c == '(' || c == ')')
{
if(c == '(')
{
printf("\t %c \t open parentheses\n", c);
}
else if(c == ')')
{
printf("\t %c \t close parentheses\n", c);
}
}
else if ( c == '=')
{
printf("\t %c \t equals operator\n", c);
}
else if(c == '<' || c == '>')
{
if ((nc = fgetc (src)) == '=')
{
if(c == '<')
{
printf("\t %c= \t comparison equivalence operator\n", c);
}
else if(c == '>')
{
printf("\t %c= \t comparison equivalence operator\n", c);
}
}
else
{
printf("\t %c \t comparison operator\n", c);
ungetc (nc, src);
}
}
else if(isdigit(c))
{
printf("\t %c \t simple numeral \n", c);
}
else if(isalpha(c))
{
if (tolower (c) == 'q')
{
printf ("\t\n 'q' received, quitting.\n\n");
break;
}
printf("\t %c \t invalid token\n", c);
}
while ((strip = fgetc (src)) != '\n' && strip != EOF);
}
if (src && !stdf) fclose (src);
return 0;
}
Output - stdin
$ ./bin/getchar_token_calc
input :
Blank Space
input :
Blank Space
input : +
+ addition operator
input : -
- subtraction operator
input : *
* multiplication operator
input : /
/ division operator
input : ^
^ exponentiation
input : (
( open parentheses
input : )
) close parentheses
input : <
< comparison operator
input : >
> comparison operator
input : <=
<= comparison equivalence operator
input : >=
>= comparison equivalence operator
input : 6
6 simple numeral
input : a
a invalid token
input : q
'q' received, quitting.
Output - file
$ ./bin/getchar_token_calc dat/tokens.txt
Blank Space
Blank Space
+ addition operator
- subtraction operator
* multiplication operator
/ division operator
^ exponentiation
( open parentheses
) close parentheses
= equals operator
< comparison operator
> comparison operator
<= comparison equivalence operator
>= comparison equivalence operator
6 simple numeral
'q' received, quitting.
Upvotes: 0
Reputation: 150
One pretty smart trick to more easily spot the (argc = 1) instead of (argc == 1) kind of mistake is to actually get used to put the constant on the left hand side. If you had written if (1 = argc), the compiler would have complained saying you were trying to assign a value to a constant, and you would instantly spot that. This comes from the gnu coding style by the way.
Upvotes: 1