Reputation: 311
I'm writing a c program that takes a text file name as an argument and prints its contents.
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
int main(int argc, char *argv[])
{
char *filepath = "source.txt";
int fd = open(filepath, O_RDONLY);
if (fd == -1)
{
printf("\n open() failed with error [%s]\n", strerror(errno));
return 1;
}
char buffer[1000];
read(fd, buffer, 1000);
printf("%s", buffer);
close(fd);
return 0;
}
I don't know how to get the number of characters in a text file, so I just set the array size 1000. It seems to work, but how can I make this work for text file with any number of characters? And I'm only allowed to use open, read, write, read system calls.
Upvotes: 4
Views: 2114
Reputation: 2188
The right answer is "you don't". If you check the size of the file before reading, you may risk that it changes before or during read. The right way is to read the file once, and only once, from beginning to end, and do whatever you want to do with the data while you are reading. (There are naturally exceptions to this "read once from beginning to end", but very unlikely for text files)
If you want to store the file in memory, you have to grow the buffer with realloc
as you are reading from the file:
char *read_file(const char *path)
{
char *file_buffer = NULL;
size_t file_length = 0;
int fd = open(path, O_RDONLY);
if (fd == -1)
{
perror("Failed to open file");
exit(EXIT_FAILURE);
}
while (1)
{
/* Make space for more bytes in file_buffer */
char *tmp = realloc(file_buffer, file_length + 1000);
if (!tmp)
{
perror("Failed to allocate memory");
exit(EXIT_FAILURE);
}
file_buffer = tmp;
/* Read into buffer after existing content */
ssize_t read_length = read(fd, file_buffer + file_length, 1000);
if (read_length == -1)
{
perror("Failed to read from file");
exit(EXIT_FAILURE);
}
else if (read_length == 0)
{
/*
End of file is reached, and since this is a text-file,
having a zero-terminated string is probably appropriate
*/
file_length += 1;
char *tmp = realloc(file_content, file_length);
if (!tmp)
{
perror("Failed to allocate memory");
exit(EXIT_FAILURE);
}
file_content = tmp;
file_content[file_length-1] = 0;
close(fd);
return file_content;
}
else
{
/* some bytes were successfully read, so update file_length */
file_length += read_length;
}
}
}
Naturally, if you only want to print the content of the buffer, there is no need for realloc
(or malloc
for that matter), just print out the content as you go:
size_t print_file(const char *path)
{
/*
We keep track of file length, just for the fun of it, and to have
something to return
*/
size_t file_length = 0;
int fd = open(path, O_RDONLY);
if (fd == -1)
{
perror("Failed to open file");
exit(EXIT_FAILURE);
}
while (1)
{
/* Temporary scratch buffer */
char buffer[1000];
ssize_t read_length = read(fd, buffer, 1000);
if (read_length == -1)
{
perror("Failed to read from file");
exit(EXIT_FAILURE);
}
else if (read_length == 0)
{
/*
End of file is reached, and nothing more to do.
We return file_length, just to return something
*/
close(fd);
return file_length;
}
else
{
/* some bytes were successfully read, so we print them */
char *b = buffer;
while (read_length)
{
ssize_t write_length = write(0, b, read_length);
if (write_length == -1)
{
perror("Failed to write to file");
exit(EXIT_FAILURE);
}
else if (write_length == 0)
{
/*
File descriptor 0 was closed.
Handling this better is left as en exercise
for the reader
*/
close(fd);
return file_length;
}
else
{
file_length += write_length;
b += write_length;
read_length -= write_length;
}
}
}
}
}
This code uses open
/read
/write
to do IO. This is only because that is what the OP asked for. A better program would have used fopen
/fread
/fwrite
or fopen
/fgetc
/fputc
.
Upvotes: 3
Reputation: 1087
In my knowledge there are two options:
Read in steps, for example, 1000 character a time. Use fread
.
Determine the file size before opening the file with system call stat
. Use the command man 2 stat
for help.
STAT(2) Linux Programmer's Manual STAT(2)
NAME
stat, fstat, lstat, fstatat - get file status
SYNOPSIS
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
int stat(const char *pathname, struct stat *statbuf);
int fstat(int fd, struct stat *statbuf);
int lstat(const char *pathname, struct stat *statbuf);
#include <fcntl.h> /* Definition of AT_* constants */
#include <sys/stat.h>
int fstatat(int dirfd, const char *pathname, struct stat *statbuf,
int flags);
...
struct stat {
dev_t st_dev; /* ID of device containing file */
ino_t st_ino; /* Inode number */
mode_t st_mode; /* File type and mode */
nlink_t st_nlink; /* Number of hard links */
uid_t st_uid; /* User ID of owner */
gid_t st_gid; /* Group ID of owner */
dev_t st_rdev; /* Device ID (if special file) */
off_t st_size; /* Total size, in bytes */
// ^^^^^^^^^^^^^^^^^^ Use this.
blksize_t st_blksize; /* Block size for filesystem I/O */
blkcnt_t st_blocks; /* Number of 512B blocks allocated */
/* Since Linux 2.6, the kernel supports nanosecond
precision for the following timestamp fields.
For the details before Linux 2.6, see NOTES. */
struct timespec st_atim; /* Time of last access */
struct timespec st_mtim; /* Time of last modification */
struct timespec st_ctim; /* Time of last status change */
#define st_atime st_atim.tv_sec /* Backward compatibility */
#define st_mtime st_mtim.tv_sec
#define st_ctime st_ctim.tv_sec
};
...
Option 1 is more reliable, since with option 2 the file size may change between size determination and file reading.
Upvotes: 2
Reputation: 21
// I got this from:
// https://www.geeksforgeeks.org/c-program-to-count-the-number-of-characters-in-a-file/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int numChars(char * path) {
int n, c;
FILE* fp;
char filepath[100];
strcpy(filepath,path);
fp = fopen(filepath, "r");
if (fp == NULL) {
printf("Could not open file %s", filepath);
return 0;
}
for (n = getc(fp); n != EOF; n = getc(fp))
c = c + 1;
fclose(fp);
return c;
}
int main(int argc, char *argv[])
{
char *filepath = "source.txt";
printf("\nThe number of chars in %s is: %d\n", filepath, numChars(filepath));
return 0;
}
Upvotes: 1
Reputation: 240759
Write a loop. In the loop, read 1000 bytes (or any number you find convenient) and pay attention to the return value of read
, which is the number of bytes actually read, so you can add it to your total. If read
returns 0, it reached the end of file, so you can stop. If it returns -1, there was an error (in errno
) and you probably want to report it and exit.
Upvotes: 3