Reputation: 19
I have a function that reads and parses a file, but the valid_lines counter does not increment despite the printf right under it is working as expected. I have tried commenting out different parts of the function and it seems that the if (sscanf...
block might be causing this issue.
void read_file(char *filename, int *city_num_lines, double *city_min, double *city_max, double *city_avg) {
// files are in data_files/ directory
char path[LINE_MAX_LENGTH] = "data_files/";
strcat(path, filename);
FILE *file = fopen(path, "r");
// check if file exists
if (file == NULL) {
printf("Error: Could not open file %s\n", filename);
return;
}
char buffer[LINE_MAX_LENGTH];
int valid_lines = 0;
double min = BIG_NUM;
double max = -BIG_NUM;
double sum = 0;
double avg = 0;
// read line by line
while (fgets(buffer, LINE_MAX_LENGTH, file) != NULL) {
// skip first line
if (buffer[0] == 'm') {
continue;
}
// separate by tab (\t)
double cur_max, cur_min;
char delim;
if (sscanf(buffer, "%lf%[\t]%lf", &cur_max, &delim, &cur_min) != 3) {
// printf("skipped line %s", buffer);
continue;
}
// increment line count
valid_lines++;
printf("num_lines incremented\n");
// update min and max
if (cur_max > max) {
max = cur_max;
printf("max: %lf\n", max);
}
if (cur_min < min) {
min = cur_min;
printf("min: %lf\n", min);
}
sum += (cur_max + cur_min) / 2;
// printf("%lf - %lf\n", min, max);
}
printf("num_lines: %d\n", valid_lines);
// calculate average
avg = sum / valid_lines;
// update city values
*city_num_lines = valid_lines;
*city_min = min;
*city_max = max;
*city_avg = avg;
fclose(file); // close file
}
here is the console output (last few lines as it goes on forever)
...
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines incremented
num_lines: 1
Does anyone have any idea at all why this is happening?
UPDATE:
The root cause of the problem had been the "%lf%[\t]%lf"
within sscanf, as I had only given it 1 space for the character however it ends with a NUL character, causing it to affect another part of memory, which in this case was the valid_lines
variable.
Upvotes: 1
Views: 84
Reputation:
Check size of filename otherwise strcat()
may cause a buffer overflow. It might be easier to use use snprintf()
then check if the path
generated is too long (defined as LINE_MAX_LENGTH -1 or longer).
fgets()
will return multiple partial read for long lines. The strict parser will catch this.
delim
is 1 byte but you require at least 2. Also, when reading strings always use maximum field width. I suggest you ignore the delimiter with the '*' like this:
sscanf(buffer, "%lf%*[\t]%lf", &cur_max, &cur_min) != 2
scanf()
will ignore leading whitespace so you will never be able get it to parse your input strictly as you said was required.
You might want to guard against valid_line == 0 before doing
avg = sum / valid_lines
.
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define BIG_NUM 42
#define LINE_MAX_LENGTH 100
void read_file(char *filename, int *city_num_lines, double *city_min, double *city_max, double *city_avg) {
char path[LINE_MAX_LENGTH];
snprintf(path, LINE_MAX_LENGTH, "data_files/%s", filename);
if(strlen(path) + 1 == LINE_MAX_LENGTH) {
printf("path name too long\n");
return;
}
FILE *file = fopen(path, "r");
if (!file) {
printf("Error: Could not open file %s\n", filename);
return;
}
char buffer[LINE_MAX_LENGTH];
*city_num_lines = 0;
*city_min = BIG_NUM;
*city_max = -BIG_NUM;
*city_avg = 0;
while (fgets(buffer, LINE_MAX_LENGTH, file) != NULL) {
if (buffer[0] == 'm') {
continue;
}
char *p = buffer;
char *p2;
// space prefix
if(isspace(*p)) {
fprintf(stderr, "skipped line %s", buffer);
continue;
}
double max = strtod(p, &p2);
// could not parse double, or field is not tab separated
if(!p2 || (*p2 != '\t' && isspace(*p2))) {
fprintf(stderr, "skipped line %s", buffer);
continue;
}
p = p2 + 1;
// 2nd field is space prefixed
if(isspace(*p)) {
fprintf(stderr, "skipped line %s", buffer);
continue;
}
double min = strtod(p, &p2);
// could not parse double, or we are missing a trailing newline
if(!p2 || *p2 != '\n') {
fprintf(stderr, "skipped line %s", buffer);
continue;
}
(*city_num_lines)++;
if(max > *city_max) *city_max = max;
if(min < *city_min) *city_min = min;
*city_avg += (max + min) / 2;
}
*city_avg = *city_num_lines ? *city_avg / *city_num_lines : 0;
fclose(file);
}
int main() {
int city_num_lines;
double city_min;
double city_max;
double city_avg;
read_file("1.txt", &city_num_lines, &city_min, &city_max, &city_avg);
printf("city_num_lines: %d, city_min: %f, city_max: %f, city_avg: %f\n", city_num_lines, city_min, city_max, city_avg);
}
With a sample input file of 1.txt with only the last line being valid:
m
x
1 1
2 2
3 3
4 4
5 5
example session returns:
skipped line x
skipped line 1 1
skipped line 2 2
skipped line 3 3
skipped line 4 4
city_num_lines: 1, city_min: 5.000000, city_max: 5.000000, city_avg: 5.000000
Upvotes: 1
Reputation: 153457
OP's code incurs buffer overflow as char delim
is too small for even a string of "\t"
.
Do not use "%[...]"
or "%s"
without a width.
// separate by tab (\t)
double cur_max, cur_min;
// char delim;
char delim[2];
//if (sscanf(buffer, "%lf%[\t]%lf", &cur_max, &delim, &cur_min) != 3) {
if (sscanf(buffer, "%lf%1[\t]%lf", &cur_max, delim, &cur_min) != 3) {
// printf("skipped line %s", buffer);
continue;
}
Above will accept 1 or more "\t"
between the numeric text as "%1[\t]"
needs 1 and only 1 '\t'
and the following "%lf"
will consume 0 or more leading white-spaces, including tabs - and then the number.
If code needs to use scanf()
and read 1 and only 1 tab between numbers, consider using "%n"
to record the offset of the scan. With "%n%*1[\t] %n"
, anything other than 1 tab will fail n1 + 1 != n2
.
int n1, n2;
if (sscanf(buffer, "%lf%n%*1[\t] %n%lf", &cur_max, &n1, &n2, &cur_min) != 2 ||
n1 + 1 != n2) {
printf("skipped line <%s>\n", buffer);
}
Alternatively, look to parsing with strtod()
, strchr()
, strtok()
, strcspn()
, etc.
Upvotes: 1
Reputation: 6776
The %[\t]
conversion will write the TAB character, plus a terminating NUL character, but you have only given it space for one character.
Upvotes: 2