topcat
topcat

Reputation: 189

How to Tokenize string[array]?

I need to tokenize a string from an array, i need just three words and ignore all tabs '\t' and spaces ' '

the array line[] is just a test case.

I debugged mine, the first array (supposed to carry only the first word) got filled by spaces & letters from 3 words, not stopping after the first word when a tab or space is met. BTW my program crashed. i suspect getting out of array bounds maybe.

What am I doing wrong?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
    char line[] = "         CLOOP       LDA             buffer,x";

    char array1[20] ="";
    char array2[20] ="";
    char array3[20] ="";

    int i = 0;
    for( i ; i<strlen(line) ; i++)
    {
        while ( (line[i] != ' ') && (line[i] != '\t'))
        {

            if(array1[0] == '\0')
            {
                int j = 0;

                while(line[i] != ' ' && line[i] != '\t')
                {
                    array1[j] = line[i];
                    i++;
                    j++;
                }
            }

            if(array2[0] =='\0');
            {
                int k = 0;

                while(line[i] != ' ' && line[i] != '\t')
                {
                    array2[k] = line[i];
                    i++;
                    k++;  
                }   
            }

            if(array3[0] == '\0')
            {
                int g = 0;

                while(line[i] != ' ' && line[i] != '\t')
                {
                    array3[g] = line[i];
                    i++;
                    g++;
                }

            }

        }
    }

    printf("array 1: %s\n array2: %s\n array3: %s\n", array1, array2, array3);

    return(0);
}

Upvotes: 1

Views: 359

Answers (1)

sg7
sg7

Reputation: 6298

You are over-complicating things. First of all it is difficult to feed all 3 arrays at the same time. The processing for one token should be completely finished before moving to the other token.

I would propose to "eat" all the white spaces before starting to process a token. That is done by:

// skip leading delimiters
if( skip_leading_delimiters )
{
     if( is_delimiter( delimiters, line[i]) ) continue;
     skip_leading_delimiters = 0;
}

After token is processes you can move to the next token and process it. I tried to preserve your concept and approach as much as I could. The amount of while loops has been reduced to 0 since // skip leading delimiters section takes care of it.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int is_delimiter(const char * delimiters, char c) // check for a delimiter
{
    char *p = strchr (delimiters, c);    // if not NULL c is separator

    if (p) return 1;                     // delimeter
    else return 0;                       // not a delimeter
}

int main()
{
    char line[] = "         CLOOP       LDA             buffer,x";

    char array1[20];
    char array2[20];
    char array3[20];

    int con1 = 1;
    int con2 = 0;
    int con3 = 0;

    int con1s = 0;
    int con2s = 0;
    int con3s = 0;

    int i = 0;
    int j = 0;

    int skip_leading_delimiters = 1;
    char * delimiters = " \b";

    for(i = 0; i < strlen(line); i++)
    {   
        // skip leading delimiters
        if( skip_leading_delimiters )
        {
            if( is_delimiter( delimiters, line[i]) ) continue;
            skip_leading_delimiters = 0;
        }

        if(con1)
        {
            if(line[i] != ' ' && line[i] != '\t')
            {
                array1[j] = line[i];
                j++;
                array1[j] = 0;
                con1s = 1;
            }
            else
            {
                if(con1s)
                {
                    con1 = 0;
                    con2 = 1;
                    skip_leading_delimiters = 1;
                    j = 0;
                }
                continue;
            }
        }

        if(con2)
        {
            if(line[i] != ' ' && line[i] != '\t')
            {
                array2[j] = line[i];
                j++;
                array2[j] = 0;
                con2s = 1;
            }
            else
            {
                con2 = 0;
                con3 = 1;
                skip_leading_delimiters = 1;
                j = 0;
                continue;
            }
        }

        if(con3)
        {
            if(line[i] != ' ' && line[i] != '\t')
            {
                array3[j] = line[i];
                j++;
                array3[j] = 0;
                con3s = 1;
            }
            else
            {
                con3 = 0;
                j = 0;
                continue;
            }
        }
    }

    printf(" array1: %s\n array2: %s\n array3: %s\n", array1, array2, array3);

    return(0);
}

Output:

 array1: CLOOP                                                                                                                               
 array2: LDA                                                                                                                                 
 array3: buffer,x  

Upvotes: 1

Related Questions