Reputation: 1003
I am trying to implement a program which can print all the hole and data segments in a regular sparse file using lseek(2)
and its arguments SEEK_DATA
and SEEK_HOLE
, which is something like:
$ ./list_hold_and_data_segs sparse_file
This file has 100 bytes
[0, 10]: hole
[11, 99]: data(end)
/*
* list_hole_and_data_segs.c
*/
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
enum Type {
HOLE,
DATA,
};
void find_all_holes(int fd);
int main(int ac, char *av[])
{
int fd = open(av[1], O_RDONLY);
if (fd == -1) {
perror("open");
exit(EXIT_FAILURE);
}
find_all_holes(fd);
return 0;
}
void find_all_holes(int fd)
{
off_t cur_offset = 0; // current offset
enum Type cur_type; // current byte type
off_t file_size = lseek(fd, 0, SEEK_END);
off_t index_of_last_byte = file_size - 1;
printf("This file has %ld bytes\n", file_size);
// check the type of byte 0
off_t res = lseek(fd, 0, SEEK_HOLE);
if (res == 0) {
cur_type = HOLE;
} else if (res == file_size) {
printf("[0, %ld]: data(then exit)\n", index_of_last_byte);
exit(0);
} else {
cur_type = DATA;
cur_offset = res;
}
while (cur_offset <= index_of_last_byte) {
off_t new_offset =lseek(fd, cur_offset,
((cur_type == DATA) ? SEEK_HOLE : SEEK_DATA));
if ((cur_type == HOLE && new_offset == -1 && errno == ENXIO) ||
(cur_type == DATA && new_offset == file_size)) {
// from current position to the end of this file: `cur_type`
printf("[%ld, %ld]: %s(end)\n", cur_offset,
index_of_last_byte,
((cur_type == DATA) ? "data" : "hole"));
break; // exit of while loop
} else {
// from current offset to the new offset: `cur_type`
printf("[%ld, %ld]: %s\n", cur_offset, new_offset - 1,
((cur_type == DATA) ? "data" : "hole"));
cur_offset = new_offset;
cur_type = (cur_type == DATA) ? HOLE : DATA;
}
}
}
I use the following code snippet to create a sparse file, error handling is omitted for simplicity:
/*
* create_sparse_file.c
*/
#include <fcntl.h>
#include <unistd.h>
int main(void)
{
int fd = open("sparse_file", O_CREAT | O_WRONLY | O_TRUNC, 0666);
lseek(fd, 10000, SEEK_CUR);
write(fd, "HELLO", 5);
close(fd);
return 0;
}
$ gcc create_sparse_file.c -o create_sparse_file && ./create_sparse_file
$ stat sparse_file
File: sparse_file
Size: 10005 Blocks: 8 IO Block: 4096 regular file
Device: 803h/2051d Inode: 3556105 Links: 1
# create a normal file as a comparision
$ cp sparse_file not_sparse_file --sparse=never
$ stat not_sparse_file
File: not_sparse_file
Size: 10005 Blocks: 24 IO Block: 4096 regular file
Device: 803h/2051d Inode: 3557867 Links: 1
$ gcc list_hole_and_data_segs.c -o list_hole_and_data_segs
$ ./list_hole_and_data_segs sparse_file
This file has 10005 bytes
[0, 8191]: hole
[8192, 10004]: data(end)
As you can see, the output of ./list_hole_and_data_seg sparse_file
is:
[0, 8191]: hole
[8192, 10004]: data(end)
And the real case is:
[0, 9999]: hole
[10000, 10004]: data(end)
What makes the behavior of list_hole_and_data_seg
not consistent with the real case and how to make it correct?
$ uname -a
Linux pop-os 5.17.15-76051715-generic #202206141358~1655919116~22.04~1db9e34 SMP PREEMPT Wed Jun 22 19 x86_64 x86_64 x86_64 GNU/Linux
$ df -hT .
Filesystem Type Size Used Avail Use% Mounted on
/dev/sda3 ext4 103G 54G 44G 56% /
$ stat -f .
File: "."
ID: 4885eb446c106708 Namelen: 255 Type: ext2/ext3
Block size: 4096 Fundamental block size: 4096
Blocks: Total: 26819732 Free: 12805152 Available: 11431226
Inodes: Total: 6856704 Free: 6062138
$ gcc --version
gcc (Ubuntu 11.2.0-19ubuntu1) 11.2.0
$ ldd --version
ldd (Ubuntu GLIBC 2.35-0ubuntu3) 2.35
Upvotes: 0
Views: 302