Reputation: 133

How to ask for user input without knowing length in advance? In C

I want user input of the form {{a,b},{c,d}}, one single line, to represent a matrix. The dimensions of the matrix are known. Using getchar(), I cannot treat cases where the integers are larger than 9, without an incredibly lengthy detour. Using scanf(), it will go to a newline after each use of this function. How do I request the input in this format correctly? It seems to me that there must be some easier way of doing this than getchar().

Like mentioned before, I have tried using a getchar() loop that worked for single digit integers, but the length of the loop becomes incorrect when taking multiple digits into account. Then, I tried the code below, but that does not appear to work, either.

int i=0;
int j=0;
for (int k=0;k<loopSize;k++){
     scanf("%d%c",&element,&c);
         matrix[i][j]=element;
         j++;
         if (j%columns==0){
            i++;
            j=0;
         }
}

How do I request the input correctly? Thanks in advance.

Upvotes: 3

Answers (3)

Neil

Reputation: 1922

In this answer, I thought one said "The dimensions of the matrix are not known," instead of "The dimensions of the matrix are known." Since the tokens are single characters and numbers, and the tokens always follow the same pattern, this can be written more compactly with scanf from stdio.h.

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>

int main(void) {
    int matrix[4];
    char rbrace;
    int success = EXIT_FAILURE;

    if(scanf(" { { %i , %i } , { %i , %i } %c", &matrix[0], &matrix[1],
        &matrix[2], &matrix[3], &rbrace) != 5 || rbrace != '}')
        { errno = EILSEQ; goto catch; }

    printf("( %4d %4d )\n( %4d %4d )\n",
        matrix[0], matrix[1], matrix[2], matrix[3]);

    success = EXIT_SUCCESS;
    goto finally;

catch:
    perror("Something went wrong");

finally:
    return success;
}

The reason that there's whitespace in the format string is we want it to skip whitespace,

A directive composed of one or more white-space characters is executed by reading input until no more valid input can be read, or up to the first byte which is not a white-space character which remains unread.

The rbrace is used to tell that we've got a trailing }}. When it matches 4 ints, I don't think there's a way to detect if the trailing constants have been matched.

The success of literal matches and suppressed assignments is only directly determinable via the %n conversion specification.

Caveat, though, the documentation says that %i expects the same sequence as with strtol with 0 as the base, but it's not entirely clear what happens if the number is out-of-range. On my computer,

{{326345645675372456345576,6435},{6,0}}
(   -1 6435 )
(    6    0 )

Upvotes: 0

Neil

Reputation: 1922

In general, parsing text is fairly hard. The grammar you've specified is fairly simple, but more complex grammars really benefit from lexing and parsing. Instead of writing it by hand, in the example I've used re2c. re2c takes as input a C file with /*!re2c comments and turns them into a fast lexer.

The x-macro allows one to print off the symbols and is super-useful for debugging, but is not needed otherwise.

C.c.re:

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <limits.h>

/* X-Macro. */

#define PARAM(A) A
#define STRINGISE(A) #A

#define SYMBOL(X) X(END), X(LBRACK), X(RBRACK), X(NUMBER), X(COMMA)

enum Symbol { SYMBOL(PARAM) };
static const char *const symbols[] = { SYMBOL(STRINGISE) };

/* Once the input is read, re2c will lex it; this stores the cursors. */
static struct { const char *from, *to; } scanner;

/*!re2c
re2c:yyfill:enable   = 0;
re2c:define:YYCTYPE  = char;
re2c:define:YYCURSOR = scanner.to;
eof = "\x00"; // If you git re2c 2, there's a simpler new way.
whitespace = [ \t\v\f\n\r];
digit = [0-9];
number = "-"? digit+; */

static enum Symbol next(void) {
reset:
    scanner.from = scanner.to;
/*!re2c
    * { return errno = EILSEQ, END; }
    eof { return END; }
    whitespace+ { goto reset; }
    "{" { return LBRACK; }
    "}" { return RBRACK; }
    number { return NUMBER; }
    "," { return COMMA; }
*/
}

/* Define `CharArray`, a vector of characters -- dynamic string in which we
 will store the entire input. */
#define ARRAY_NAME Char
#define ARRAY_TYPE char
#include "BasicArray.h"

struct Token {
    enum Symbol symbol;
    const char *from;
    int length;
};

#define ARRAY_NAME Token
#define ARRAY_TYPE struct Token
#include "BasicArray.h"

/** Fills the `token` with `symbol` and the most recent scanner values. Returns
 whether it worked. */
static int init_token(struct Token *const token, const enum Symbol symbol) {
    assert(token && symbol && scanner.from && scanner.from < scanner.to);
    if(scanner.from + INT_MAX < scanner.to) return errno = EILSEQ, 0;
    token->symbol = symbol;
    token->from = scanner.from;
    token->length = (int)(scanner.to - scanner.from);
    return 1;
}

#define ARRAY_NAME Int
#define ARRAY_TYPE int
#include "BasicArray.h"

struct Matrix {
    struct IntArray ints;
    size_t cols;
};

/** Returns true if the `tokens` are a valid matrix, and puts it in `matrix`
 which must be empty. It doesn't allow empty matrices. */
static int parse(const struct TokenArray *const tokens,
    struct Matrix *const matrix) {
    size_t set_col = 0, col = 0;
    int is_set_col = 0;
    long input;
    int *output;
    const struct Token *t = 0;
    assert(tokens && matrix && !IntArraySize(&matrix->ints));
    errno = 0;

    /* Left outside. */
    t = TokenArrayNext(tokens, t);
    if(!t || t->symbol != LBRACK) return errno = EILSEQ, 0;
    goto left_middle;

left_middle:
    t = TokenArrayNext(tokens, t);
    if(!t || t->symbol != LBRACK) return errno = EILSEQ, 0;
    col = 0;
    goto inside_number;

inside_number:
    t = TokenArrayNext(tokens, t);
    if(!t || t->symbol != NUMBER) return errno = EILSEQ, 0;
    input = strtol(t->from, 0, 0);
    if(errno) return 0;
    if(input < INT_MIN || input > INT_MAX) return errno = ERANGE, 0;
    if(!(output = IntArrayNew(&matrix->ints))) return 0;
    *output = input;
    col++;
    goto end_number;

end_number:
    if(!(t = TokenArrayNext(tokens, t))) return errno = EILSEQ, 0;
    if(t->symbol == RBRACK) {
        if(is_set_col) {
            if(set_col != col) return errno = EILSEQ, 0;
        } else {
            set_col = col;
            is_set_col = 1;
        }
        goto finished_middle;
    } else if(t->symbol == COMMA) {
        goto inside_number;
    } else {
        return errno = EILSEQ, 0;
    }

finished_middle:
    if(!(t = TokenArrayNext(tokens, t))) return errno = EILSEQ, 0;
    if(t->symbol == RBRACK) {
        goto end_matrix;
    } else if(t->symbol == COMMA) {
        goto left_middle;
    } else {
        return errno = EILSEQ, 0;
    }

end_matrix:
    if((t = TokenArrayNext(tokens, t))) return 0;
    assert(is_set_col);
    matrix->cols = set_col;
    return 1;
}    


int main(void) {
    struct TokenArray tokens = ARRAY_ZERO;
    struct CharArray buffer = ARRAY_ZERO;
    struct Matrix matrix = { ARRAY_ZERO, 0 };
    enum Symbol symbol;
    const size_t granularity = 1024;
    size_t nread, i, n;
    const int *ints;
    int success = EXIT_SUCCESS;

    /* Read all contents from `stdin` at once. */
    do {
        char *read_here;
        if(!(read_here = CharArrayBuffer(&buffer, granularity))
            || (nread = fread(read_here, 1, granularity, stdin), ferror(stdin))
            || !CharArrayExpand(&buffer, nread)) goto catch;
    } while(nread == granularity);

    /* Embed '\0' on the end for simple lexing. */
    {
        char *zero = CharArrayNew(&buffer);
        if(!zero) goto catch;
        *zero = '\0';
    }

    /* We use simplified sentinel method of detecting EOF, no embedded '\0'. */
    {
        const char *const b = CharArrayGet(&buffer);
        const size_t len_to_zero = (size_t)(strchr(b, '\0') - b);
        if(len_to_zero != CharArraySize(&buffer) - 1) {
            errno = EILSEQ;
            fprintf(stderr, "Embedded '\\0' at byte %lu/%lu.\n",
                (unsigned long)len_to_zero,
                (unsigned long)CharArraySize(&buffer) - 1);
            goto catch;
        }
    }

    /* Point the `scanner` to the `buffer`. */
    scanner.to = CharArrayGet(&buffer);

    /* Scan all input. */
    while((symbol = next())) {
        struct Token *const token = TokenArrayNew(&tokens);
        if(!token || !init_token(token, symbol)) goto catch;
        printf("%.*s -> %s\n", token->length, token->from,
            symbols[token->symbol]);
    }
    /* `errno` will be set if a syntax error occurs. */
    if(errno) goto catch;

    /* Parse input if it is a valid matrix. */
    if(!parse(&tokens, &matrix)) goto catch;

    /* Print the matrix. */
    n = IntArraySize(&matrix.ints);
    ints = IntArrayGet(&matrix.ints);
    printf("matrix columns %lu.\n", (unsigned long)matrix.cols);
    for(i = 0; i < n; i++)
        printf("%4d%s", ints[i],
        (i % matrix.cols) == (matrix.cols - 1) ? "\n" : ", ");
    printf("\n");

    goto finally;

catch:
    perror("Something went wrong");
    success = EXIT_FAILURE;

finally:
    IntArray_(&matrix.ints);
    TokenArray_(&tokens);
    CharArray_(&buffer);
    return success;
}

Not really pertinent to your question, but if one wants to try my code as written, one needs BasicArray.h. One could have put maximum fixed-size arrays, but the project I'm working on has variable-sized dynamic arrays so I used that.

#include <stdlib.h> /* realloc free */
#include <assert.h> /* assert */
#include <errno.h>  /* errno */

/* Check defines. */
#ifndef ARRAY_NAME /* <-- error */
#error Generic ARRAY_NAME undefined.
#endif /* error --> */
#ifndef ARRAY_TYPE /* <-- error */
#error Generic ARRAY_TYPE undefined.
#endif /* --> */

/* Generics using the preprocessor;
 \url{ http://stackoverflow.com/questions/16522341/pseudo-generics-in-c }. */
#ifdef CAT
#undef CAT
#endif
#ifdef CAT_
#undef CAT_
#endif
#ifdef PCAT
#undef PCAT
#endif
#ifdef PCAT_
#undef PCAT_
#endif
#ifdef T
#undef T
#endif
#ifdef T_
#undef T_
#endif
#ifdef PT_
#undef PT_
#endif
#define CAT_(x, y) x ## y
#define CAT(x, y) CAT_(x, y)
#define PCAT_(x, y) x ## _ ## y
#define PCAT(x, y) PCAT_(x, y)
#define T_(thing) CAT(ARRAY_NAME, thing)
#define PT_(thing) PCAT(array, PCAT(ARRAY_NAME, thing))

typedef ARRAY_TYPE PT_(Type);
#define T PT_(Type)

struct T_(Array);
struct T_(Array) {
    T *data;
    /* Fibonacci; data -> (c0 < c1 || c0 == c1 == max_size). */
    size_t capacity, next_capacity;
    /* !data -> !size, data -> size <= capacity */
    size_t size;
};

/* `{0}` is `C99`. */
#ifndef ARRAY_ZERO /* <-- !zero */
#define ARRAY_ZERO { 0, 0, 0, 0 }
#endif /* !zero --> */

static int PT_(reserve)(struct T_(Array) *const a,
    const size_t min_capacity, T **const update_ptr) {
    size_t c0, c1;
    T *data;
    const size_t max_size = (size_t)-1 / sizeof(T *);
    assert(a);
    if(!a->data) {
        if(!min_capacity) return 1;
        c0 = 8;
        c1 = 13;
    } else {
        if(min_capacity <= a->capacity) return 1;
        c0 = a->capacity;
        c1 = a->next_capacity;
    }
    if(min_capacity > max_size) return errno = ERANGE, 0;
    assert(c0 < c1);
    /* Fibonacci: c0 ^= c1, c1 ^= c0, c0 ^= c1, c1 += c0; */
    while(c0 < min_capacity) {
        size_t temp = c0 + c1; c0 = c1; c1 = temp;
        if(c1 > max_size || c1 < c0) c1 = max_size;
    }
    if(!(data = realloc(a->data, c0 * sizeof *a->data))) return 0;
    if(update_ptr && a->data != data)
        *update_ptr = data + (*update_ptr - a->data);
    a->data = data;
    a->capacity = c0;
    a->next_capacity = c1;
    return 1;
}

static void PT_(array)(struct T_(Array) *const a) {
    assert(a);
    a->data          = 0;
    a->capacity      = 0;
    a->next_capacity = 0;
    a->size          = 0;
}

static void T_(Array_)(struct T_(Array) *const a) {
    if(!a) return;
    free(a->data);
    PT_(array)(a);
}

static void T_(Array)(struct T_(Array) *const a) {
    if(!a) return;
    PT_(array)(a);
}

static size_t T_(ArraySize)(const struct T_(Array) *const a) {
    if(!a) return 0;
    return a->size;
}

static T *T_(ArrayGet)(const struct T_(Array) *const a) {
    return a ? a->data : 0;
}

static T *T_(ArrayNext)(const struct T_(Array) *const a, const T *const here) {
    const T *data;
    size_t idx;
    if(!a) return 0;
    if(!here) {
        data = a->data;
        idx = 0;
    } else {
        data = here + 1;
        idx = (size_t)(data - a->data);
    }
    return idx < a->size ? (T *)data : 0;
}

static T *PT_(new)(struct T_(Array) *const a, T **const update_ptr) {
    assert(a);
    if(!PT_(reserve)(a, a->size + 1, update_ptr)) return 0;
    return a->data + a->size++;
}

static T *T_(ArrayNew)(struct T_(Array) *const a) {
    if(!a) return 0;
    return PT_(new)(a, 0);
}

static T *T_(ArrayBuffer)(struct T_(Array) *const a, const size_t buffer) {
    if(!a || !buffer || !PT_(reserve)(a, a->size + buffer, 0)) return 0;
    return a->data + a->size;
}

static int T_(ArrayExpand)(struct T_(Array) *const a, const size_t add) {
    if(!a) return 0;
    if(add > a->capacity || a->size > a->capacity - add)
        return errno = ERANGE, 0;
    a->size += add;
    return 1;
}

/* Prototype. */
static void PT_(unused_coda)(void);
/** This silences unused function warnings from the pre-processor, but allows
 optimisation, (hopefully.)
 \url{ http://stackoverflow.com/questions/43841780/silencing-unused-static-function-warnings-for-a-section-of-code } */
static void PT_(unused_set)(void) {
    T_(Array_)(0);
    T_(Array)(0);
    T_(ArraySize)(0);
    T_(ArrayGet)(0);
    T_(ArrayNext)(0, 0);
    T_(ArrayNew)(0);
    T_(ArrayBuffer)(0, 0);
    T_(ArrayExpand)(0, 0);
    PT_(unused_coda)();
}
/** {clang}'s pre-processor is not fooled if you have one function. */
static void PT_(unused_coda)(void) { PT_(unused_set)(); }

/* Un-define all macros. */
#undef ARRAY_NAME
#undef ARRAY_TYPE
#undef CAT
#undef CAT_
#undef PCAT
#undef PCAT_
#undef T
#undef T_
#undef PT_

One compiles this:

re2c -W -o C.c C.c.re
gcc -Wall -Wextra -O3 -pedantic -ansi C.c

Input this:

{{77,88,}}
^D

Gets:

{ -> LBRACK
{ -> LBRACK
77 -> NUMBER
, -> COMMA
88 -> NUMBER
, -> COMMA
} -> RBRACK
} -> RBRACK
Something went wrong: Illegal byte sequence

Input this:

     {{9, -8},  {-7,  4},  {2, 0}  }
^D

Gets:

{ -> LBRACK
{ -> LBRACK
9 -> NUMBER
, -> COMMA
-8 -> NUMBER
} -> RBRACK
, -> COMMA
{ -> LBRACK
-7 -> NUMBER
, -> COMMA
4 -> NUMBER
} -> RBRACK
, -> COMMA
{ -> LBRACK
2 -> NUMBER
, -> COMMA
0 -> NUMBER
} -> RBRACK
} -> RBRACK
matrix columns 2.
   9,   -8
  -7,    4
   2,    0

In hindsight, this is probably overkill for this problem, but it scales well. Also, when comprehensively testing, it's generally easier to test the lexer separate from the parser, which, in general, uses much less symbols.

Upvotes: 0

kiran Biradar

Reputation: 12732

You could use getchar() as below

int val = 0;
int number = 0;
int dimLevel = 2;
int state = 0; // 

while ((val = getchar()) != '\n){

    if (val == '{' && state <= dimLevel){
       state++;
    }
    else if (val == '}' && state > 0){
       state--;
    }
    else if (val == ',' && state == dimLevel){
       matrix[i][j] = number;
       number = 0;

       if (j%columns==0){
          i++;
          j=0;
       }
    }
    else if (isdigit(val) && state == dimLevel){
       number = number*10 + (val-'0');
    }
    else if (val == '-' && state == dimLevel){
       number *= -1;
    }
}

if (number && state == 0)
   matrix[i][j] = number; //last number

Upvotes: 1

How to ask for user input without knowing length in advance? In C

Answers (3)

Related Questions