Reputation: 343
I'm attempting to find all the files in a directory of a certain type (hardcoded here to tif) and copy them into an array. Everything compiles cleanly (gcc -Wall gives no errors or warnings) but there are some memory issues. Though the program I've written seems to run cleanly (no segfaults), some of the file names are weird characters you get when you've got something other than ascii values in your string. This led me to run with valgrind, which shows errors (output below) but I can't track down what the actual problem is. In some directories, valgrind it self segfaults (the program runs clean in the same dir).
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <search.h>
#include <string.h>
#include <error.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>
#define min(X, Y) ((X) < (Y) ? (X) : (Y))
int exitStatus = 0;
/*------------------------------------------------------------------------------
* array_find
*
* ARGS - Takes a pointer to a string, a pointer to an array of strings, and an
* int representing the length of the array.
*
* RETURN - returns an int indicating the first index of the key in the array,
* or -1 if the key was not found
*-----------------------------------------------------------------------------*/
int array_find(char *key, char *argv[], int argc){
int i;
for (i = 0; i < argc; i++)
{
#ifdef DEBUG_array_find
printf("strncmp(%s, %s, %d) = %d\n", key, argv[i], min(strlen(key), strlen(argv[i])), strncmp(key, argv[i], min(strlen(key), strlen(argv[i]))));
#endif
if (strncmp(key, argv[i], min(strlen(key), strlen(argv[i]))) == 0)
{
return i;
}
}
return -1;
}
/*------------------------------------------------------------------------------
* ends_with
*
* ARGS - str = string to be checked
* sub = string to look for
*
* RETURN - Returns true if str ends with sub or both strings are NULL.
False otherwise.
*-----------------------------------------------------------------------------*/
bool ends_with(char *str, char *sub){
if (str == NULL && sub == NULL)
{
return true;
}
if (str == NULL || sub == NULL)
{
return false;
}
char *last_instance_of_sub = rindex(str, *sub); //Finds the last index of the first char of sub
int sub_len = strlen(sub);
if (last_instance_of_sub == NULL || strlen(last_instance_of_sub) != sub_len)
{
return false;
}
return strncmp(last_instance_of_sub, sub, sub_len) == 0;
}
int main(int argc, char *argv[])
{
/*Parse args*/
DIR *dir;
int index = array_find("-d", argv, argc);
char *dirname;
if (index >= 0)
{
dirname = argv[index + 1];
dir = opendir(dirname);
}
else
{
dirname = getcwd(NULL, 0);
if (dirname == NULL)
{
perror("Error getting current directory name.");
exit(1);
}
dir = opendir(dirname);
}
if (dir == NULL)
{
perror(dirname);
exit(1);
}
#ifdef DEBUG_MAIN
printf("dirname = %s\n", dirname);
#endif
int threads = 1;
index = array_find("-t", argv, argc);
if (index >= 0)
{
threads = atoi(argv[index + 1]);
}
#ifdef DEBUG_MAIN
printf("threads = %d\n", threads);
#endif
struct dirent *entry = readdir(dir);
int num_files = 0;
while (entry != NULL)
{
if (ends_with(entry->d_name, ".tif")){
#ifdef DEBUG_MAIN
printf("%s\n", entry->d_name);
#endif
num_files++;
}
entry = readdir(dir);
}
if (closedir(dir) != 0)
{
perror("Failed to close directory.");
}
#ifdef DEBUG_MAIN
printf("Num files = %d\n", num_files);
#endif
dir = opendir(dirname);
if (dir == NULL)
{
perror(dirname);
exit(1);
}
entry = readdir(dir);
char *file_names[num_files];
int i = 0;
for(; entry != NULL; i++)
{
if (ends_with(entry->d_name, ".tif")){
file_names[i] = strdup(entry->d_name);
if (file_names[i] == NULL)
{
perror("Could not create the filename array.\n");
exit(1);
}
}
entry = readdir(dir);
}
/* #ifdef DEBUG_MAIN*/
for (i = 0; i < num_files; i++)
{
printf("%s\n", file_names[i]);
/* free(file_names[i]);*/
}
/* #endif*/
free(dir);
return exitStatus;
}
Valgrind output:
==24488== Memcheck, a memory error detector
==24488== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==24488== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==24488== Command: ./myprogram -d /home/chris/Pictures/Catalinas\ with\ Christie/Processed/
==24488==
dirname = /home/chris/Pictures/Catalinas with Christie/Processed/
threads = 1
cacti2_lzn.tif
DSC_2139_lzn.tif
DSC_1512_lzn.tif
DSC_1296_lzn.tif
DSC_1577_lzn.tif
DSC_1658_lzn.tif
DSC_1293_lzn.tif
DSC_1631_lzn.tif
DSC_1418_lzn.tif
DSC_1315_2crop_lzn.tif
DSC_1377_lzn2crop.tif
DSC_2167_lzn.tif
1981-1985-HDR3_lzn2.tif
DSC_2129_lzn.tif
DSC_1448_lzn.tif
DSC_1607_lzn.tif
DSC_1564_lzn.tif
DSC_2052-DSC_2072_lzn.tif
DSC_1487_lzn.tif
DSC_1591_2_lzn.tif
DSC_2124_lzn.tif
DSC_1622_lzn.tif
DSC_2157_lzn.tif
DSC_1685_lzn.tif
Num files = 24
cacti2_lzn.tif
DSC_2139_lzn.tif
DSC_1512_lzn.tif
DSC_1296_lzn.tif
DSC_1577_lzn.tif
DSC_1658_lzn.tif
==24488== Use of uninitialised value of size 8
==24488== at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488== by 0x4EA4ECB: puts (ioputs.c:36)
==24488== by 0x400D52: main (batch-convert.c:161)
==24488==
==24488== Invalid read of size 1
==24488== at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488== by 0x4EA4ECB: puts (ioputs.c:36)
==24488== by 0x400D52: main (batch-convert.c:161)
==24488== Address 0x0 is not stack'd, malloc'd or (recently) free'd
==24488==
==24488==
==24488== Process terminating with default action of signal 11 (SIGSEGV)
==24488== Access not within mapped region at address 0x0
==24488== at 0x4C2D7C2: __GI_strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24488== by 0x4EA4ECB: puts (ioputs.c:36)
==24488== by 0x400D52: main (batch-convert.c:161)
==24488== If you believe this happened as a result of a stack
==24488== overflow in your program's main thread (unlikely but
==24488== possible), you can try to increase the size of the
==24488== main thread stack using the --main-stacksize= flag.
==24488== The main thread stack size used in this run was 8388608.
==24488==
==24488== HEAP SUMMARY:
==24488== in use at exit: 33,243 bytes in 25 blocks
==24488== total heap usage: 26 allocs, 1 frees, 66,051 bytes allocated
==24488==
==24488== LEAK SUMMARY:
==24488== definitely lost: 0 bytes in 0 blocks
==24488== indirectly lost: 0 bytes in 0 blocks
==24488== possibly lost: 0 bytes in 0 blocks
==24488== still reachable: 33,243 bytes in 25 blocks
==24488== suppressed: 0 bytes in 0 blocks
==24488== Rerun with --leak-check=full to see details of leaked memory
==24488==
==24488== For counts of detected and suppressed errors, rerun with: -v
==24488== Use --track-origins=yes to see where uninitialised values come from
==24488== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 2 from 2)
Segmentation fault (core dumped)
It's been a while since I've used C at all, but as I understand it (from the man pages), strdup should use malloc to allocate memory on the heap for the copy of the string. Before I remembered the strdup function, I had tried to do exactly that manually, and had the same errors. I thought maybe my code was flawed, and thought the strdup function would take care of it, but apparently there is some other issue.
Can anyone tell me what I'm doing wrong?
EDIT 1: As per requests, I've added the full source of the program. Also, for those saying to check i against num_files, as you'll see, I count the number of tif files ahead of time, so I know the exact number of files that will be copied into the array, thus checking the index isn't necessary.
Also, as a note, the program was compiled with DEBUG_MAIN defined, so anything in an #ifdef DEBUG_MAIN block does run. No other debug flags were defined.
Upvotes: 3
Views: 1012
Reputation: 754710
The problem is that if you have any entries that don't match your pattern (such as the .
and ..
entries), you skip the corresponding entry in the array. It also means you go writing outside your file_names
array. You should only increment i
when the file name matches.
Using getcwd()
instead of just using .
for the current directory works, but is hardly necessary.
Using free(dir)
instead of closedir(dir)
is an unmitigated disaster.
The command line argument handling is unusual. As originally written, it would accept -delete
as equivalent to -d
. That's not good style.
#include <assert.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>
bool ends_with(char *str, char *sub);
int array_find(char *key, char *argv[], int argc);
int array_find(char *key, char *argv[], int argc)
{
for (int i = 0; i < argc; i++)
{
if (strcmp(key, argv[i]) == 0)
return i;
}
return -1;
}
bool ends_with(char *str, char *sub)
{
if (str == NULL && sub == NULL)
return true;
if (str == NULL || sub == NULL)
return false;
char *last_instance_of_sub = rindex(str, *sub);
size_t sub_len = strlen(sub);
if (last_instance_of_sub == NULL || strlen(last_instance_of_sub) != sub_len)
return false;
return strcmp(last_instance_of_sub, sub) == 0;
}
int main(int argc, char *argv[])
{
int index = array_find("-d", argv, argc);
char *dirname;
if (index >= 0)
{
dirname = argv[index + 1];
}
else
{
dirname = getcwd(NULL, 0);
if (dirname == NULL)
{
perror("Error getting current directory name.");
exit(1);
}
}
DIR *dir = opendir(dirname);
if (dir == NULL)
{
perror(dirname);
exit(1);
}
char suffix[] = ".c";
printf("dirname = %s\n", dirname);
struct dirent *entry;
int num_files = 0;
while ((entry = readdir(dir)) != NULL)
{
if (ends_with(entry->d_name, suffix))
num_files++;
}
if (closedir(dir) != 0)
{
perror("Failed to close directory.");
}
printf("Num files = %d\n", num_files);
dir = opendir(dirname);
if (dir == NULL)
{
perror(dirname);
exit(1);
}
char *file_names[num_files];
int i = 0;
while ((entry = readdir(dir)) != NULL)
{
if (ends_with(entry->d_name, suffix))
{
file_names[i] = strdup(entry->d_name);
if (file_names[i++] == NULL)
{
perror("Could not create the filename array.\n");
exit(1);
}
}
}
assert(i <= num_files);
if (i < num_files)
num_files = i;
for (i = 0; i < num_files; i++)
{
printf("%s\n", file_names[i]);
free(file_names[i]);
}
closedir(dir);
return 0;
}
Upvotes: 0
Reputation: 1667
in your code this part for(; entry != NULL; i++)
is way too dangerous , for example lets say that the value of num_files
is 1000 , what if a given directory contains 1002 entries , then you'll have a problem.
replace it with for(; entry != NULL && i < num_files ; i++)
Upvotes: 1