Reputation: 11
I'm trying to read unknown length binary file into buffer chunks without using the functions like lseek()
,fseek
.
eof
but i am kind of confused with how to implement this. Thanks in advance.
#include <stdio.h>
#include <stdlib.h>
typedef struct Buffer{
unsigned char data[1012];
struct Buffer *next; //12 bytes
}Buffer;
void mymemcpy(void *dest, void *src, size_t length){
Buffer *buffer_toFill = (Buffer *)dest;
Buffer *buffer_toAdd = (Buffer *)src;
int a = 0;
for(int i = 0; i < length; i++){
buffer_toFill->data[i] = buffer_toAdd->data[i];
}
}
Buffer* add_buffer_front(Buffer *head, Buffer *read_buffer, int size){
Buffer *new_buffer = malloc(sizeof(Buffer));
mymemcpy(new_buffer, read_buffer, size);
if(head != NULL){
new_buffer->next = head;
}
return new_buffer;
}
void display_List(Buffer *head, size_t length){
Buffer *current = head;
while(current != NULL){
for(int i = 0; i < length; i++){
printf("%02X",(unsigned)current->data[i]); //this shows different value compare with xxd <filename>
//printf("%c", current->data[i]);
}
Buffer *prev = current;
free(prev);
current = current->next;
}
}
int main(int argc, char **argv){
FILE *fd;
Buffer *head_buffer = NULL;
int file_length = 0;
int eof_int = 1;
if(argc != 2){
printf("Usage: readFile <filename>\n");
return 1;
}
fd = fopen(argv[1], "rb");
while(eof_int != 0){
Buffer *new_buffer = malloc(sizeof(Buffer));
eof_int = fread(new_buffer, sizeof(Buffer)-12, 1, fd);
if(eof_int == 0){
//size_t length
//
//
head_buffer = add_buffer_front(head_buffer, new_buffer, length);
file_length += length;
}else{
head_buffer = add_buffer_front(head_buffer, new_buffer, (sizeof(new_buffer->data)));
file_length += (sizeof(new_buffer->data));
}
}
display_List(head_buffer, file_length);
fclose(fd);
return 0;
}
Upvotes: 1
Views: 1507
Reputation: 164919
The trick you're looking for is that fread
returns the number of items read. You're reading 1 buffer full so it will only tell you that you read 0 or 1 buffers. Instead, flip it and reverse it: read a buffer's worth of bytes.
size_t bytes_read = fread(buffer, 1, sizeof(Buffer)-12, fd);
Now you can know how many bytes were read into your buffer. We can add a size
field to the Buffer
so each buffer can remember how many bytes it read and only print that many.
const size_t BUFFER_SIZE = 1024;
typedef struct Buffer {
// I'll explain why I switched to a pointer in a moment
unsigned char *data;
size_t size;
struct Buffer *next;
} Buffer;
void Buffer_print( Buffer *buffer ) {
for( size_t i = 0; i < buffer->size; i++ ) {
printf("%02hhX ", buffer->data[i]);
}
}
Buffer *Buffer_new() {
Buffer *buffer = malloc(sizeof(Buffer));
buffer->size = 0;
buffer->data = NULL;
buffer->next = NULL;
return buffer;
}
Note that I'm careful to initialize all the fields of the buffer, else we risk getting garbage.
Now we've changed our buffer, so our assumption about its size and position is broken. That's ok, we should be reading straight into buffer->data
anyway.
size_t Buffer_read( Buffer *buffer, size_t buffer_size, FILE* fp ) {
buffer->data = malloc(buffer_size);
size_t bytes_read = fread(buffer->data, 1, buffer_size, fp);
buffer->size = bytes_read;
return bytes_read;
}
Now that the buffer knows how much data its read, we can allocate any size of the data as we like. There's no need to hard code that into the struct. This makes the code more flexible and efficient. It lets us cheaply allocate empty buffers and that will make things much simpler.
We can also get away with using malloc
and leaving buffer->data
initialized with garbage. If fread
only partially fills buffer->data
the rest will remain garbage. That's ok, knowing the size of the data we've read means we stop printing before we hit garbage.
Now we can construct our loop. When it's read 0 bytes we know it's done reading.
while( Buffer_read( buffer, BUFFER_SIZE, fp ) > 0 ) {
... now what ...
}
fclose(fp);
The way a linked list works, when you add to the list you add to tail->next
and make that the new tail. This is often called "pushing".
Buffer *Buffer_push( Buffer *tail, Buffer *new_tail ) {
tail->next = new_tail;
return new_tail;
}
Buffer *head = Buffer_new();
Buffer *tail = head;
while( Buffer_read( tail, BUFFER_SIZE, fp ) > 0 ) {
tail = Buffer_push( tail, Buffer_new() );
}
fclose(fp);
Note that we start with an empty head
which is also the tail
. Starting with these both allocated makes the loop much simpler. There's no need to check if( head )
or if( tail )
. It does mean that we always have an empty buffer on the end of our list. That's ok. Since we're no longer using a fixed buffer->data
empty buffers are now tiny and cheap.
The final step is to print everything. We can already print a single buffer, so we just need to walk the linked list and print each buffer.
void Buffer_print_all( Buffer *head ) {
for( Buffer *buffer = head; buffer != NULL; buffer = buffer->next ) {
Buffer_print(buffer);
}
}
Buffer_print_all(head);
That final, empty buffer hanging off the end is fine. It knows its size is 0 so Buffer_print
won't actually use the null buffer->data
.
Upvotes: 0
Reputation: 13757
You have several problems.
(1) fread
returns the number of items read, but it will not return an eof
indication. You need to call feof(stream*)
to find out if you've reached the end of file.
(2) You are saying your next pointer is 12 bytes. This is a very dangerous assumption. Prefer to read the 1012 bytes you've allocated to the data struct. In all likelihood you are currently printing stuff that wasn't read in, but is just uninitialized memory.
(3) Use the return value from fread
to decide how much memory to copy.
Upvotes: 2
Reputation: 60007
Please see the comments in the code below - also consider changing the 1012 to use a #define.
#include <stdio.h>
#include <stdlib.h>
typedef struct Buffer{
unsigned char data[1012];
struct Buffer *next; //12 bytes
}Buffer;
// Create a structure to store stuff about a file
typedef struct {
Buffer *head;
Buffer *tail;
size_t length;
} MyFile;
/*
void mymemcpy(void *dest, void *src, size_t length){
Buffer *buffer_toFill = (Buffer *)dest;
Buffer *buffer_toAdd = (Buffer *)src;
int a = 0;
for(int i = 0; i < length; i++){
buffer_toFill->data[i] = buffer_toAdd->data[i];
}
}
Buffer* add_buffer_front(Buffer *head, Buffer *read_buffer, int size){
Buffer *new_buffer = malloc(sizeof(Buffer));
mymemcpy(new_buffer, read_buffer, size);
if(head != NULL){
new_buffer->next = head;
}
return new_buffer;
}
*/
// Lets make this easier - The buffer has already been "malloced" once - why do it again
// And why are you reversing the file
// Perhaps
void add_buffer(Buffer *to_be_added, MyFile *file, size_t extra_length) {
if (file->tail) { // We have one item in the list
file->tail->next = to_be_added;
} else { // First buffer!
file-> head = to_be_added;
file-> tail = to_be_added;
}
to_be_added->next = NULL; // This is always the case as it is the last one
file->length += extra_length;
}
/*
void display_List(Buffer *head, size_t length){
Buffer *current = head;
while(current != NULL){
for(int i = 0; i < length; i++){
printf("%02X",(unsigned)current->data[i]); //this shows different value compare with xxd <filename>
//printf("%c", current->data[i]);
}
Buffer *prev = current;
free(prev);
current = current->next;
}
}
*/
// Instead pass in the new structure
void display_list(MyFile *file) {
size_t contents_left = file -> length;
Buffer * current = file -> head;
while (current) {
// At most each chunk has 1012 bytes - Check for that
size_t chunk_length = contents_left > 1012 ? 1012 : contents_left;
for(int i = 0; i <chunk_length ; i++){
printf("%02X",(unsigned)current->data[i]);
}
current = current -> next;
}
}
}
int main(int argc, char **argv){
FILE *fd;
MyFile read_file;
read_file.head = NULL;
read_file.tail = NULL;
read_file.length = 0;
Buffer *head_buffer = NULL;
int file_length = 0;
int eof_int = 1;
if(argc != 2){
printf("Usage: readFile <filename>\n");
return 1;
}
fd = fopen(argv[1], "rb");
// Check fd
if (fd == NULL) {
// error stuff
return EXIT_FAILURE; // Look up the include for this
}
while(eof_int != 0){
Buffer *new_buffer = malloc(sizeof(Buffer));
eof_int = fread(new_buffer->data, 1012, 1, fd); // Do not make assumptions on the size of a pointer and store it in the correct location
if(eof_int == 0) { // Read nothing
free(new_buffer); // We was too optimistic! Did Not need this in the end
break;
} else {
add_buffer(&read_file, new_buffer, eof_int);
}
}
display_List(&read_file);
fclose(fd);
return 0;
}
Upvotes: 1