TheMoop456
TheMoop456

Reputation: 11

Trying to replace text in files throws a error

The point of the script is to take three parameters. Find, replace, prefix. Find being the text to replace, replace being what to replace the text with, and prefix is a special case. If prefix is in the text, you replace the prefix (some text) with prefix+replace. I would like to know why the below code throws a error right after saying opened file. It only seems to throw an error if the text being replaced is repeated like "aaa", "bbb" where "a" is what is being replaced.

 Opened file.txt
 *** Error in `./a.out': malloc(): memory corruption: 0x00005652fbc55980 ***

There's also the occasionally seg fault after printing "Trying to replace for file ...". I'm not fluent in C and GDB on my system resulted in just missing library errors which has nothing to do with this. Here is the code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>

char concat(const char *s1, const char *s2)
{
    char *result = calloc(strlen(s1)+strlen(s2)+1, 1);
    strcpy(result, s1);
    strcat(result, s2);
    printf("Prefix will be replaced with %s.\n", result);
    return result;
}

static int replaceString(char *buf, const char *find, const char *replace, const char *prefix)
{

    int olen, rlen;
    char *s, *d;
    char *tmpbuf;

    if (!buf || !*buf || !find || !*find || !replace)
        return 0;

    tmpbuf = calloc(strlen(buf) + 1, 1);

    if (tmpbuf == NULL)
        return 0;

    olen = strlen(find);
    rlen = strlen(replace);

    s = buf;
    d = tmpbuf;

    while (*s) {
        if (strncmp(s, find, olen) == 0) {
            strcpy(d, replace);
            s += olen;
            d += rlen;
        }
        else
        {
            *d++ = *s++;
        }
    }

    *d = '\0';

   if(strcmp(buf, tmpbuf) == 0)
   {
          free(tmpbuf);
          return 0;
   }
   else
   {
         strcpy(buf, tmpbuf);
         free(tmpbuf);
         printf("%s", buf);
         printf("Replaced!\n");
         return 1;
   }

}

void getAndReplace(char* filename, char* find, char* replace, char* prefix)
{

   long length;
   FILE* f = fopen (filename, "r");
   char* buffer = 0;

   if (f)
   {
      fseek (f, 0, SEEK_END);
      length = ftell (f);
      fseek (f, 0, SEEK_SET);
      buffer = calloc(length+1, 1); //If i use malloc here, any file other than the first has garbage added to it. Why?
      if (buffer)
      {
        fread(buffer, 1, length, f);
      }
      fclose(f);
   }

   if(buffer)// && strlen(buffer) > 1)
   {
      int result = replaceString(buffer, find, replace, prefix);

      if(result == 0)
      {
         printf("Trying to replace prefix.\n");
         replace = concat(prefix, replace);
         result = replaceString(buffer, prefix, replace, "");
      }
      else
      {
         printf("Successfully replaced %s with %s\n", find, replace);
      }

      if(result == 1)
      {
         FILE* fp = fopen(filename, "w+");
         if(fp)
         {
             printf("Opened %s\n", filename);
             fprintf(fp, buffer);
             fclose(fp);
             printf("File %s overwritten with changes.\n", filename);
         }
      }
      else
      {
          printf("Nothing to replace for %s\n", filename);
      }
   }
   else
   {
     printf("Empty file.");
   }
   if(buffer)
   {
      free(buffer);
   }
}

int main(int argc, char **argv)
{

    if(argc < 4)
    {
      printf("Not enough arguments given: ./hw3 <find> <replace> <prefix>\n");
      return 1;
    }

    struct dirent *de;

    DIR *dr = opendir(".");

    if (dr == NULL)
    {
        printf("Could not open current directory\n");
        return 0;
    }

    while ((de = readdir(dr)) != NULL)
    {
       if(strlen(de->d_name) > 4 && !strcmp(de->d_name + strlen(de->d_name) - 4, ".txt"))
       {
            printf("Trying to replace for file %s\n", de->d_name);
            getAndReplace(de->d_name, argv[1], argv[2], argv[3]);
       }
    }

    closedir(dr);
    return 0;
}

Upvotes: 0

Views: 95

Answers (1)

Pablo
Pablo

Reputation: 13580

I hope that you concat function

char concat(const char *s1, const char *s2);

is just a typo and you meant

char *concat(const char *s1, const char *s2);

otherwise the function would be returning a pointer as if it were a char.

Using valgrind would give more details where exactly you are reading/writing where you are not allowed to and where you are leaking memory. Without that it's hard to pinpoint the exact place. One thing I noticed is that depending on the length of find and replace, you might not have enough memory for tmpbuf which would lead to a buffer overflow.

I think that the best way to write the replaceString is by making it allocate the memory it needs itself, rather than providing it a buffer to write into. Because you are getting both find and replace from the user, you don't know how large the resulting buffer will need to be. You could calculate it beforehand, but you don't do that. If you want to pass a pre-allocated buffer to replaceString, I'd pass it as a double pointer, so that replaceString can do realloc on it when needed. Or allocate the memory in the function and return a pointer to the allocated memory.

This would be my version:

char *replaceString(const char *haystack, const char *needle, const char *replace)
{
    if(haystack == NULL || needle == NULL || replace == NULL)
        return NULL;

    char *dest = NULL, *tmp;

    size_t needle_len = strlen(needle);
    size_t replace_len = strlen(replace);
    size_t curr_len = 0;

    while(*haystack)
    {
        char *found = strstr(haystack, needle);

        size_t copy_len1 = 0;
        size_t new_size = 0;
        size_t pre_found_len = 0;

        if(found == NULL)
        {
            copy_len1 = strlen(haystack) + 1;
            new_size = curr_len + copy_len1;
        } else {
            pre_found_len = found - haystack;
            copy_len1 = pre_found_len;
            new_size = curr_len + pre_found_len + replace_len + 1;
        }


        tmp = realloc(dest, new_size);
        if(tmp == NULL)
        {
            free(dest);
            return NULL;
        }

        dest = tmp;

        strncpy(dest + curr_len, haystack, copy_len1);

        if(found == NULL)
            return dest; // last replacement, copied to the end

        strncpy(dest + curr_len + pre_found_len, replace, replace_len + 1);
        curr_len += pre_found_len + replace_len;

        haystack += pre_found_len + needle_len;
    }

    return dest;
}

The idea in this version is similar to yours, but mine reallocates the memory as it goes. I changed the name of the arguments to have the same name as the strstr function does based on my documentation:

man strstr

char *strstr(const char *haystack, const char *needle);

Because I'm going to update haystack to point past the characters copied, I use this loop:

while(*haystack)
{
    ...
}

which means it is going to stop when the '\0'-terminating byte is reached.

The first thing is to use strstr to locate a substring that matches needle. Base on whether a substring is found, I calculate how much bytes I would need to copy until the substring, and the new size of the buffer. After that I reallocate the memory for the buffer and copy everything until the substring, then append the replacement, update the curr_len variable and update the haystack pointer to point past the substring.

If the substring is not found, no more replacements are needed. So we have to copy the string pointed to by haystack and return the constructed string. The new size of the destination is curr_len + strlen(haystack) + 1 (the +1 because I want the strncpy function to also copy the '\0'-terminating byte). And it has to copy strlen(haystack) + 1 bytes. After the first strncpy, the function returns dest.

If the substring is found, then we have to copy everything until the substring, append the replacement and update the current length and the haystack pointer. First I calculate the string until the found substring and save it in pre_found_len. The new size of the destination will be curr_len + pre_found_len + replace_len + 1 (the current length + length of string until substring + the length of the replacement + 1 for the '\0'-terminating byte). Now the first strncpy copies only pre_found_len bytes. Then it copies the replacement.

Now you can call it like this:

int main(void)
{
    const char *orig = "Is this the real life? Is this just fantasy?";
    char *text = replaceString(orig, "a", "_A_");
    if(text)
    {
        puts(orig);
        puts(text);
    }

    free(text);
}

which will output:

Is this the real life? Is this just fantasy?
Is this the re_A_l life? Is this just f_A_nt_A_sy?

Now you can use this function in getAndReplace to replace the prefix:

char *getAndReplace(char* filename, char* find, char* replace, char* prefix)
{
    ...

    char *rep1 = replaceString(buffer, find, replace);

    if(rep1 == NULL)
    {
        // error
        free(buffer);
        return NULL;
    }

    char *prefix_rep = malloc(strlen(replace) + strlen(prefix) + 1);
    if(prefix_rep == NULL)
    {
        // error
        free(buffer);
        free(rep1);
        return NULL;
    }

    sprintf(prefix_rep, "%s%s", replace, prefix);

    char *rep2 = replaceString(rep1, prefix, prefix_rep);

    if(rep2 == NULL)
    {
        // error
        free(buffer);
        free(rep1);
        free(prefix_rep);
        return NULL;
    }

    // rep2 has all the replacements
    ...


    // before leaving
    free(buffer);
    free(rep1);
    free(prefix_rep);

    // returning all replacements
    return rep2;
}

When using malloc & co, don't forget to check if they return NULL and don't forget to free the memory when not needed.

Upvotes: 1

Related Questions