luminousmen
luminousmen

Reputation: 2169

Implementation of strtok() function

I need to write my function strtok. Below is my code. The problem is - I can't display result string. In code I use strcpy() and then display new array. Is it possible to display string using just pointer *str?

#include <stdio.h>
#include <string.h>   

char* my_strtok(char* s, char* delm){
    char W[100];
    int i = 0, k = 0, j = 0;
    char *ptr;
    static char *Iterator;
    ptr = s;

    if (s == NULL){
        s = Iterator;
    }
    while (s[i] != '\0'){
        j = 0;
        while (delm[j] != '\0'){
            if (s[i] != delm[j])
                W[k] = s[i];
            else goto It;
            j++;
        }
        ptr++;
        i++;
        k++;
    }
It:
    W[i] = 0;
    Iterator = ++ptr;
    return W;
}

int main(void){
    char s[100];
    char delm[] = " ";
    gets(s);
    char newstr[100];
    char *str = my_strtok(s, delm);
    strcpy(newstr, str);
    printf("%s", newstr);
    return 0;
}

Upvotes: 3

Views: 56239

Answers (8)

Ifiok Ekott
Ifiok Ekott

Reputation: 1

works with string pointers but you must take care to free the returned result,tried the ones on top but still had memory leaks and couldn't understand where was leaking

/**
 * next_delim - finds the next delimiter
 * @string: the string to check
 * @delim: the delimiter to be checked
 * Return: the position of delimiter
 */
int next_delim(char *string, char *delim)
{
    int iter;

    for (iter = 0; string[iter] != '\0'; iter++)
    {
        if (is_delim(string[iter], delim))
            return (iter);
        else if (string[iter + 1] == '\0')
            return (iter + 1);
    }
    return (0);
}

/**
 * _strtok -  works like the popular strtok
 * @source: the source string
 * @delim: the delimiter to be checked
 * Return: the string that has being parsed
 */
char *_strtok(char *source, char *delim)
{
    static char *backup_string;
    char *buffer;
    static int position;
    int iter, previous_position;

    if ((!source && !backup_string))
    {
        return (NULL);
    }
    else if (!source)
    {
        if (backup_string[position - 1] == '\0')
            return (NULL);
        source = backup_string + position;
    }
    else
    {
        backup_string = source;
        position = 0;
    }
    for (; is_delim(*source, delim); *source++, position++)
        ;
    previous_position = position;
    position += next_delim((source), delim);
    buffer = malloc(sizeof(char) * (position - previous_position + 1));
    for (iter = 0; iter < (position - previous_position); iter++)
        buffer[iter] = source[iter];
    buffer[iter] = '\0';
    position++;
    return (buffer);
}

Upvotes: 0

Drowning_Ophelia
Drowning_Ophelia

Reputation: 21

The glibc solution provided by @Enzo Ferber did not work for me (MS Visual Studio 2019, Visual Micro Release 22.02.18.5 --> Arduino(1.8), avr-gcc 5.4.0) at:

/* Terminate the token and make OLDS point past it. */ *s = '\0';

because s is just a shallow copy of the pointer. Changed *s = '\0'; to:

**((char**)&s) = '\0';

and it worked fine. Not sure if this is something specific to the gcc implementation, but seems that trying to modify a shallow copy of a pointer in a function would fail in any ANSI/ISO C implementation.

Upvotes: 0

Rose
Rose

Reputation: 21

Here is the code which explains how strtok works in C.

#include <stdio.h>
#include <string.h>

#define SIZE_OF_STRING 50 
#define SIZE_OF_DELIMITER 5

/* Make the temp_ptr as static, so it will hold the previous pointing address */
static char *temp_ptr = NULL;

char *string_token(char *str, char *delimiter)
{
    char *final_ptr = NULL;  `enter code here`
    /* Flag has been defined as static to avoid the parent function loop
     * runs infinitely after reaching end of string.
     */ 
    static int flag = 0;
    int i, j;

    if (delimiter == NULL) {
        return NULL;
    }

    /* If flag is 1, it will denote the end of the string */
    if (flag == 1) {
        return NULL;
    }

    /* The below condition is to avoid temp_ptr is getting assigned 
     * with NULL string from the parent function main. Without
     * the below condition, NULL will be assigned to temp_ptr 
     * and final_ptr, so accessing these pointers will lead to
     * segmentation fault.
     */
    if (str != NULL) { 
        temp_ptr = str; 
    }

    /* Before function call ends, temp_ptr should move to the next char,
     * so we can't return the temp_ptr. Hence, we introduced a new pointer
     * final_ptr which points to temp_ptr.
     */
    final_ptr = temp_ptr;

    printf("%s %d str: %s delimiter: %s length: %ld temp_ptr: %s strlen: %ld"
           " final_ptr: %s \n",__func__, __LINE__, str, delimiter, 
           strlen(delimiter), temp_ptr, strlen(temp_ptr), final_ptr);

    for (i = 0; i <= strlen(temp_ptr); i++)
    {
        for (j = 0; j < strlen(delimiter); j++) {

            if (temp_ptr[i] == '\0') {
                /* If the flag is not set, both the temp_ptr and flag_ptr 
                 * will be holding string "Jasmine" which will make parent 
                 * to call this function string_token infinitely. 
                 */
                flag = 1;
                return final_ptr;
            }

            if ((temp_ptr[i] == delimiter[j])) {
                /* NULL character is assigned, so that final_ptr will return 
                 * til NULL character. Here, final_ptr points to temp_ptr.
                 */
                temp_ptr[i] = '\0';
                temp_ptr += i+1;
                return final_ptr;
            }
        }
    }
    /* You will somehow end up here if for loop condition fails.
     * If this function doesn't return any char pointer, it will 
     * lead to segmentation fault in the parent function.
     */
    return NULL;
}


int main()
{
    char str[SIZE_OF_STRING] = "shirley|Rose|Jasmine";
    char *token = NULL;
    char del[SIZE_OF_DELIMITER] = "|";

    token = string_token(str, del);
    while (token != NULL) {       
        printf("token %s\n", token);
        token = string_token(NULL, del);
    }
    return 0;
}

https://shirleyanengineer.blogspot.com/2019/11/write-your-own-strtok-in-c.html

Upvotes: 0

coolVick
coolVick

Reputation: 538

Internal implementation of strtok has already been discussed here:

How does strtok() split the string into tokens in C?

In your type of implementation ( calling it your type because it is quite different from the actual one), you have not allocated any memory dynamically to the local variable 'W'. So when you return it, it is not guaranteed that you will receive the string correctly because that memory which was locally allocated to 'W' is not reserved for it anymore in the calling function. Apart from this, there are many unnecessary variables used in the code. And also, it needs proper re-structuring. For your better understanding I have modified your code(keeping it in your style as far as possible) to do the required job:

#include <stdio.h>
#include <string.h>  
#include <malloc.h>

char* my_strtok(char* s, char* delm)
{
    static int currIndex = 0;
    if(!s || !delm || s[currIndex] == '\0')
    return NULL;
    char *W = (char *)malloc(sizeof(char)*100);
    int i = currIndex, k = 0, j = 0;

    while (s[i] != '\0'){
        j = 0;
        while (delm[j] != '\0'){
            if (s[i] != delm[j])
                W[k] = s[i];
            else goto It;
            j++;
        }

        i++;
        k++;
    }
It:
    W[i] = 0;
    currIndex = i+1;
    //Iterator = ++ptr;
    return W;
}

int main(void)
{
    char s[100] = "my name is khan";
    char delm[] = " ";
    //char newstr[100];
    char *str = my_strtok(s, delm);
    while(str){
        printf("%s", str);
        free(str);
        str = my_strtok(s, delm);
    }

    return 0;
}

Upvotes: 6

Amar Nagendra
Amar Nagendra

Reputation: 1

Here is an implementation increase the input buffers as per your needs and increase the no of calls to strtok with str1 as NULL to get back more tokens

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
char *ret_nondelim(char *str1,char *str2);
char *my_strtok(char *str1,char *str2);
char *ret_noofbytes(char *str1,char *str2);
int main()
{
    char str1[20];
    char str2[20];
    char *res;
    printf("enter string one\n");
    scanf("%s",str1);
    printf("enter string two\n");
    scanf("%s",str2);
    res=my_strtok(str1,str2);
    if(res)
        printf("%s\n",res);
    else
        printf("returned NULL\n");
    free(res);
    res=my_strtok(NULL,str2);
    if(res)
        printf("%s\n",res);
    else
        printf("returned NULL\n");
    free(res);
    res=my_strtok(NULL,str2);
    if(res)
        printf("%s\n",res);
    else
        printf("returned NULL\n");
    free(res);
}
char *my_strtok(char *str1,char *str2)
{
    static char *str1_cpy;
    static int i;
    char *ptr;
    int flag=0;
    if(str1!=NULL)
    {
        str1_cpy=ret_nondelim(str1,str2);      //get location of non delimiting byte
        str1=ret_noofbytes(str1_cpy,str2);  //scan and get location of delimitng byte
        if((str1-str1_cpy))         //no of bytes = non delimiting location - delimting location
        {
            ptr=malloc(str1-str1_cpy+1);        //malloc location and and store the string`enter code here`
            int k;
            for(k=0;k<(str1-str1_cpy);k++)
                ptr[k]=str1_cpy[k];
            ptr[k]='\0';
            str1_cpy=str1;              //save pointer for next iteration
            return ptr;             //return pointer
        }
        else
            return NULL;

    }
    else
    {
        str1_cpy=ret_nondelim(str1_cpy,str2);   //same as above but pass saved pointer 
        str1=ret_noofbytes(str1_cpy,str2);
        if((str1-str1_cpy))
        {
            ptr=malloc(str1-str1_cpy+1);
            int k;
            for(k=0;k<(str1-str1_cpy);k++)
                ptr[k]=str1_cpy[k];
            ptr[k]='\0';
            str1_cpy=str1;          //save pointer for next iteration
            return ptr;
        }
        else
            return NULL;
    }
}
char *ret_nondelim(char *str1,char *str2)
{
    int flag=0;
    for(;*(str1);)
    {
        for(int j=0;j<strlen(str2);j++)
        {
            if(*(str1)==*(str2+j))          //check if slected byte is non delimiting byte
            {
                str1++;             //break if true go check next byte
                break;
            }
            else
                flag++;             //shows that selected byte is none of the delimitng byte
        }
        if(flag==strlen(str2))              //(above comment)-so non delimiting byte found return that pointer to caller
            break;
        else
            flag=0;
    }
    return str1;
}

char *ret_noofbytes(char *str1,char *str2)
{
    int flag=0;
    for(;*(str1);)
    {
        for(int k=0;k<strlen(str2);k++)
        {
            if(*(str2+k)==*(str1))          //check if selected bytes is delimiting byte
            {
                flag=1;
                break;              //break twice if true ie .flag==1
            }
        }
        if(flag==1)
            break;
        else
            str1++;                 //if not found go check next byte
    }
    return str1;                        //return the point where delimiting byte was found
}

Upvotes: 0

Kohn1001
Kohn1001

Reputation: 3901

Actually this one is my solution to the general case... when str is char* you can't write to it.

Here it is:

And here is the link to github resource:

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#define DICT_LEN 256

int *create_delim_dict(char *delim)
{
    int *d = (int*)malloc(sizeof(int)*DICT_LEN);
    memset((void*)d, 0, sizeof(int)*DICT_LEN);

    int i;
    for(i=0; i< strlen(delim); i++) {
        d[delim[i]] = 1;
    }
    return d;
}



char *my_strtok(char *str, char *delim)
{

    static char *last, *to_free;
    int *deli_dict = create_delim_dict(delim);

    if(!deli_dict) {
        return NULL;
    }

    if(str) {
        last = (char*)malloc(strlen(str)+1);
        if(!last) {
            free(deli_dict);
        }
        to_free = last;
        strcpy(last, str);
    }

    while(deli_dict[*last] && *last != '\0') {
        last++;
    }
    str = last;
    if(*last == '\0') {
        free(deli_dict);
        free(to_free);
        return NULL;
    }
    while (*last != '\0' && !deli_dict[*last]) {
        last++;
    }

    *last = '\0';
    last++;

    free(deli_dict);
    return str;
}

int main()
{
    char * str = "- This, a sample string.";
    char *del = " ,.-";
    char *s = my_strtok(str, del);
    while(s) {
        printf("%s\n", s);
        s = my_strtok(NULL, del);
    }
    return 0;
}

Upvotes: 2

Maverick Meerkat
Maverick Meerkat

Reputation: 6404

char * strtok2(char *str, const char *delim)
{
static char *nxt; /* static variable used to advance the string to replace delimiters */
static int size;  /* static variable used to count until the end of the string        */

/* IMPORTANT: any advance to 'nxt' must be followed by a diminution of 'size', and vice verce */

int i; /* counter of delimiter(s) in string */

/* initialize the string when strtok2 is first calles and supplied with a valid string */
if(str != NULL)
{
    nxt = str;
    size = strlen(str);
}

/* if we havn't reached the end of the string, any other call to strtok2 with NULL will come here */
else if(size > 0)
{
    nxt++;      /* last run left nxt on a null terminator, so we have to advance it */
    size--;     /* any advancement must follow diminution of size                   */
    str = nxt;  /* string now points to the first char after the last delimiter     */ 
}

/* if we reached the end of string, return NULL pointer */
else
{
    str = NULL;
}

/* nxt is used to advance the string until a delimiter or a series of delimiters are encountered; 
 * it then stops on the last delimiter which has turned to NULL terminator
 */
while(*nxt)
{
    i = strspn(nxt, delim);
    while(i > 1)        /* turns delimiters to NULL terminator (except the last one) */
    {
        *nxt = '\0';
        nxt++;
        size--;
        i--;
    }                   /* in the last delimiter we have to do something a      */
    if(1 == i)          /* bit different we have to actually take nxt backwards */
    {                   /* one step, to break the while(*nxt) loop              */
        *nxt = '\0';
        if(size > 1)    /* if the delimiter is last char, don't do next lines   */
        {
            nxt--;
            size++;     /* nxt is diminished so size increases                    */
        }
    }
    nxt++;          /* if no delimiter is found, advance nxt                  */
    size--;         /* in case of the last delimiter in a series, we took nxt */
}                   /* a step back, so now moving it a step forward means     */
                    /* it rests on a NULL terminator                          */
return str;
}

Upvotes: 0

Enzo Ferber
Enzo Ferber

Reputation: 3094

Bounty Disclaimer

Bounty: "Looking for an answer drawing from credible and/or official sources."

I think GNU GLibC constitutes a credible and official source, right? You can download GLibC 2.22 here as .tar.gz (25mb).


After you extract the sources:

string/strtok.c

#include <string.h>


static char *olds;

#undef strtok

#ifndef STRTOK
# define STRTOK strtok
#endif

/* Parse S into tokens separated by characters in DELIM.
   If S is NULL, the last string strtok() was called with is
   used.  For example:
    char s[] = "-abc-=-def";
    x = strtok(s, "-");     // x = "abc"
    x = strtok(NULL, "-=");     // x = "def"
    x = strtok(NULL, "=");      // x = NULL
        // s = "abc\0=-def\0"
*/
char *
STRTOK (char *s, const char *delim)
{
  char *token;

  if (s == NULL)
    s = olds;

  /* Scan leading delimiters.  */
  s += strspn (s, delim);
  if (*s == '\0')
    {
      olds = s;
      return NULL;
    }

  /* Find the end of the token.  */
  token = s;
  s = strpbrk (token, delim);
  if (s == NULL)
    /* This token finishes the string.  */
    olds = __rawmemchr (token, '\0');
  else
    {
      /* Terminate the token and make OLDS point past it.  */
      *s = '\0';
      olds = s + 1;
    }
  return token;
}


string/strspn.c

#include <string.h>

#undef strspn
#ifndef STRSPN
#define STRSPN strspn
#endif

/* Return the length of the maximum initial segment
   of S which contains only characters in ACCEPT.  */
size_t
STRSPN (const char *s, const char *accept)
{
  const char *p;
  const char *a;
  size_t count = 0;

  for (p = s; *p != '\0'; ++p)
    {
      for (a = accept; *a != '\0'; ++a)
    if (*p == *a)
      break;
      if (*a == '\0')
    return count;
      else
    ++count;
    }

  return count;
}
libc_hidden_builtin_def (strspn)


string/strpbrk.c

#include <string.h>

#undef strpbrk

#ifndef STRPBRK
#define STRPBRK strpbrk
#endif

/* Find the first occurrence in S of any character in ACCEPT.  */
char *
STRPBRK (const char *s, const char *accept)
{
  while (*s != '\0')
    {
      const char *a = accept;
      while (*a != '\0')
    if (*a++ == *s)
      return (char *) s;
      ++s;
    }

  return NULL;
}
libc_hidden_builtin_def (strpbrk)


sysdeps/x86_64/rawmemchr.S

#include <sysdep.h>

    .text
ENTRY (__rawmemchr)
    movd    %rsi, %xmm1
    mov %rdi, %rcx

    punpcklbw %xmm1, %xmm1
    punpcklbw %xmm1, %xmm1

    and $63, %rcx
    pshufd  $0, %xmm1, %xmm1

    cmp $48, %rcx
    ja  L(crosscache)

    movdqu  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
/* Check if there is a match.  */
    pmovmskb %xmm0, %eax
    test    %eax, %eax

    jnz L(matches)
    add $16, %rdi
    and $-16, %rdi
    jmp L(loop_prolog)

    .p2align 4
L(crosscache):
    and $15, %rcx
    and $-16, %rdi
    movdqa  (%rdi), %xmm0

    pcmpeqb %xmm1, %xmm0
/* Check if there is a match.  */
    pmovmskb %xmm0, %eax
/* Remove the leading bytes.  */
    sar %cl, %eax
    test    %eax, %eax
    je  L(unaligned_no_match)
/* Check which byte is a match.  */
    bsf %eax, %eax

    add %rdi, %rax
    add %rcx, %rax
    ret

    .p2align 4
L(unaligned_no_match):
    add $16, %rdi

    .p2align 4
L(loop_prolog):
    movdqa  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    movdqa  16(%rdi), %xmm2
    pcmpeqb %xmm1, %xmm2
    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    movdqa  48(%rdi), %xmm4
    pcmpeqb %xmm1, %xmm4
    add $64, %rdi
    pmovmskb %xmm4, %eax
    test    %eax, %eax
    jnz L(matches0)

    test    $0x3f, %rdi
    jz  L(align64_loop)

    movdqa  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    movdqa  16(%rdi), %xmm2
    pcmpeqb %xmm1, %xmm2
    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    movdqa  48(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax

    add $64, %rdi
    test    %eax, %eax
    jnz L(matches0)

    and $-64, %rdi

    .p2align 4
L(align64_loop):
    movdqa  (%rdi), %xmm0
    movdqa  16(%rdi), %xmm2
    movdqa  32(%rdi), %xmm3
    movdqa  48(%rdi), %xmm4

    pcmpeqb %xmm1, %xmm0
    pcmpeqb %xmm1, %xmm2
    pcmpeqb %xmm1, %xmm3
    pcmpeqb %xmm1, %xmm4

    pmaxub  %xmm0, %xmm3
    pmaxub  %xmm2, %xmm4
    pmaxub  %xmm3, %xmm4
    pmovmskb %xmm4, %eax

    add $64, %rdi

    test    %eax, %eax
    jz  L(align64_loop)

    sub $64, %rdi

    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3

    pcmpeqb 48(%rdi), %xmm1
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    pmovmskb %xmm1, %eax
    bsf %eax, %eax
    lea 48(%rdi, %rax), %rax
    ret

    .p2align 4
L(matches0):
    bsf %eax, %eax
    lea -16(%rax, %rdi), %rax
    ret

    .p2align 4
L(matches):
    bsf %eax, %eax
    add %rdi, %rax
    ret

    .p2align 4
L(matches16):
    bsf %eax, %eax
    lea 16(%rax, %rdi), %rax
    ret

    .p2align 4
L(matches32):
    bsf %eax, %eax
    lea 32(%rax, %rdi), %rax
    ret

    .p2align 4
L(return_null):
    xor %rax, %rax
    ret

END (__rawmemchr)

weak_alias (__rawmemchr, rawmemchr)
libc_hidden_builtin_def (__rawmemchr)

Upvotes: 9

Related Questions