nexfwall
nexfwall

Reputation: 23

Extract the file name and its extension in C

So we have a path string /home/user/music/thomas.mp3.

Where is the easy way to extract file name(without extension, "thomas") and it's extension ("mp3") from this string? A function for filename, and for extension. And only GNU libc in our hands.


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_FILENAME_SIZE 256

char *filename(char *str) {
    char *result;
    char *last;
    if ((last = strrchr(str, '.')) != NULL ) {
        if ((*last == '.') && (last == str))
            return str;
        else {
            result = (char*) malloc(MAX_FILENAME_SIZE);
            snprintf(result, sizeof result, "%.*s", (int)(last - str), str);
            return result;
        }
    } else {
        return str;
    }
}

char *extname(char *str) {
    char *result;
    char *last;
    if ((last = strrchr(str, '.')) != NULL) {
        if ((*last == '.') && (last == str))
            return "";
        else {
            result = (char*) malloc(MAX_FILENAME_SIZE);
            snprintf(result, sizeof result, "%s", last + 1);
            return result;
        }
    } else {
        return ""; // Empty/NULL string
    }
}

Upvotes: 0

Views: 14593

Answers (5)

David Lannan
David Lannan

Reputation: 179

I know this is old. But I tend to use strtok for things like this.

/* strtok example */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define MAX_TOKENS  20  /* Some reasonable values */
#define MAX_STRING  128 /* Easy enough to make dynamic with mallocs */

int main ()
{
  char str[] ="/home/user/music/thomas.mp3";
  char sep[] = "./";
  char collect[MAX_TOKENS][MAX_STRING];

  /* Not really necessary, since \0 is added inplace. I do this out of habit. */
  memset(collect, 0, MAX_TOKENS * MAX_STRING);
  char * pch = strtok (str, sep);
  int ccount = 0;    

  if(pch != NULL) {
    /* collect all seperated text */
    while(pch != NULL) { 
        strncpy( collect[ccount++], pch, strlen(pch));
        pch = strtok (NULL, sep);
    }
  }

  /* output tokens. */
  for(int i=0; i<ccount; ++i)
    printf ("Token: %s\n", collect[i]);
  return 0;
}

This is a rough example, and it makes it easy to deal with the tokens afterwards. Ie the last token is the extension. Second last is the basename and so on. I also find it useful for rebuilding paths for different platforms - replace / with \.

Upvotes: 0

ryyker
ryyker

Reputation: 23218

Here is a routine I use for that problem:

Separates original string into separate strings of path, file_name and extension.

Will work for Windows and Linux, relative or absolute style paths. Will handle directory names with embedded ".". Will handle file names without extensions.

/////////////////////////////////////////////////////////
//
// Example:
// Given path == "C:\\dir1\\dir2\\dir3\\file.exe"
// will return path_ as   "C:\\dir1\\dir2\\dir3"
// Will return base_ as   "file"
// Will return ext_ as    "exe"
//
/////////////////////////////////////////////////////////
void GetFileParts(char *path, char *path_, char *base_, char *ext_)
{
    char *base;
    char *ext;
    char nameKeep[MAX_PATHNAME_LEN];
    char pathKeep[MAX_PATHNAME_LEN];
    char pathKeep2[MAX_PATHNAME_LEN]; //preserve original input string
    char File_Ext[40];
    char baseK[40];
    int lenFullPath, lenExt_, lenBase_;
    char *sDelim={0};
    int   iDelim=0;
    int  rel=0, i;

    if(path)
    {   //determine type of path string (C:\\, \\, /, ./, .\\)
        if(  (strlen(path) > 1) &&

            (
            ((path[1] == ':' ) &&
             (path[2] == '\\'))||

             (path[0] == '\\') ||

             (path[0] == '/' ) ||

            ((path[0] == '.' ) &&
             (path[1] == '/' ))||

            ((path[0] == '.' ) &&
             (path[1] == '\\'))
            )
        )
        {
            sDelim = calloc(5, sizeof(char));
            /*  //   */if(path[0] == '\\') iDelim = '\\', strcpy(sDelim, "\\");
            /*  c:\\ */if(path[1] == ':' ) iDelim = '\\', strcpy(sDelim, "\\"); // also satisfies path[2] == '\\'
            /*  /    */if(path[0] == '/' ) iDelim = '/' , strcpy(sDelim, "/" );
            /* ./    */if((path[0] == '.')&&(path[1] == '/')) iDelim = '/' , strcpy(sDelim, "/" );
            /* .\\   */if((path[0] == '.')&&(path[1] == '\\')) iDelim = '\\' , strcpy(sDelim, "\\" );
            /*  \\\\ */if((path[0] == '\\')&&(path[1] == '\\')) iDelim = '\\', strcpy(sDelim, "\\");
            if(path[0]=='.')
            {
                rel = 1;
                path[0]='*';
            }

            if(!strstr(path, "."))  // if no filename, set path to have trailing delim,
            {                      //set others to "" and return
                lenFullPath = strlen(path);
                if(path[lenFullPath-1] != iDelim)
                {
                    strcat(path, sDelim);
                    path_[0]=0;
                    base_[0]=0;
                    ext_[0]=0;
                }
            }
            else
            {
                nameKeep[0]=0;         //works with C:\\dir1\file.txt
                pathKeep[0]=0;
                pathKeep2[0]=0;        //preserves *path
                File_Ext[0]=0;
                baseK[0]=0;

                //Get lenth of full path
                lenFullPath = strlen(path);

                strcpy(nameKeep, path);
                strcpy(pathKeep, path);
                strcpy(pathKeep2, path);
                strcpy(path_, path); //capture path

                //Get length of extension:
                for(i=lenFullPath-1;i>=0;i--)
                {
                    if(pathKeep[i]=='.') break; 
                }
                lenExt_ = (lenFullPath - i) -1;

                base = strtok(path, sDelim);
                while(base)
                {
                    strcpy(File_Ext, base);
                    base = strtok(NULL, sDelim);
                }


                strcpy(baseK, File_Ext);
                lenBase_ = strlen(baseK) - lenExt_;
                baseK[lenBase_-1]=0;
                strcpy(base_, baseK);

                path_[lenFullPath -lenExt_ -lenBase_ -1] = 0;

                ext = strtok(File_Ext, ".");
                ext = strtok(NULL, ".");
                if(ext) strcpy(ext_, ext);
                else strcpy(ext_, "");
            }
            memset(path, 0, lenFullPath);
            strcpy(path, pathKeep2);
            if(rel)path_[0]='.';//replace first "." for relative path
            free(sDelim);
        }
    }
}

Upvotes: 2

M.M
M.M

Reputation: 141576

Regarding your actual code (all the other answers so far say to scrap that and do something else, which is good advice, however I am addressing your code as it contains blunders that it'd be good to learn about in advance of next time you try to write something).

Firstly:

strncpy(str, result, (size_t) (last-str) + 1);

is not good. You have dest and src around the wrong way; and further this function does not null-terminate the output (unless the input is short enough, which it isn't). Generally speaking strncpy is almost never a good solution to a problem; either strcpy if you know the length, or snprintf.

Simpler and less error-prone would be:

snprintf(result, sizeof result, "%.*s", (int)(last - str), str);

Similary in the other function,

snprintf(result, sizeof result, "%s", last + 1);

The snprintf function never overflows buffer and always produces a null-terminated string, so long as you get the buffer length right!

Now, even if you fixed those then you have another fundamental problem in that you are returning a pointer to a buffer that is destroyed when the function returns. You could fix ext by just returning last + 1, since that is null-terminated anyway. But for filename you have the usual set of options:

  • return a pointer and a length, and treat it as a length-counted string, not a null-terminated one
  • return pointer to mallocated memory
  • return pointer to static buffer
  • expect the caller to pass in a buffer and a buffer length, which you just write into

Finally, returning NULL on failure is probably a bad idea; if there is no . then return the whole string for filename, and an empty string for ext. Then the calling code does not have to contort itself with checks for NULL.

Upvotes: 2

G--
G--

Reputation: 924

Use basename to get the filename and then you can use something like this to get the extension.

char *get_filename_ext(const char *filename) {
    const char *dot = strrchr(filename, '.');
    if(!dot || dot == filename) return "";
    return dot + 1;
}

Edit: Try something like.

#include <string.h>
#include <libgen.h>

static void printFileInfo(char *path) {
    char *bname;
    char *path2 = strdup(path);
    bname = basename(path2);
    printf("%s.%s\n",bname, get_filename_ext(bname));
    free(path2);
}

Upvotes: 4

Valentin Mercier
Valentin Mercier

Reputation: 5326

Here is an old-school algorithm that will do the trick.

char path[100] = "/home/user/music/thomas.mp3";
int offset_extension, offset_name;
int len = strlen(path);
int i;
for (i = len; i >= 0; i--) {
    if (path[i] == '.')
        break;
    if (path[i] == '/') {
        i = len;
        break;
    }
}
if (i == -1) {
    fprintf(stderr,"Invalid path");
    exit(EXIT_FAILURE);
}
offset_extension = i;
for (; i >= 0; i--)
    if (path[i] == '/')
        break;
if (i == -1) {
    fprintf(stderr,"Invalid path");
    exit(EXIT_FAILURE);
}
offset_name = i;

char *extension, name[100];
extension = &path[offset_extension+1];
memcpy(name, &path[offset_name+1], offset_extension - offset_name - 1);

Then you have both information under the variables name and extension

printf("%s %s", name, extension);

This will print:

thomas mp3

Upvotes: 0

Related Questions