troutwine
troutwine

Reputation: 3831

Correctly setting lifetimes and mutability expectations in Rust

I'm rather new to Rust and have put together a little experiment that blows my understanding of annotations entirely out of the water. This is compiled with rust-0.13.0-nightly and there's a playpen version of the code here.

The meat of the program is the function 'recognize', which is co-responsible for allocating String instances along with the function 'lex'. I'm sure the code is a bit goofy so, in addition to getting the lifetimes right enough to get this compiling I would also happily accept some guidance on making this idiomatic.

#[deriving(Show)]
enum Token<'a> {
    Field(&'a std::string::String),
}

#[deriving(Show)]
struct LexerState<'a> {
    character: int,
    field: int,
    tokens: Vec<Token<'a>>,
    str_buf: &'a std::string::String,
}

// The goal with recognize is to:
//
//  * gather all A .. z into a temporary string buffer str_buf
//  * on ',', move buffer into a Field token
//  * store the completely extracted field in LexerState's tokens attribute
//
// I think I'm not understanding how to specify the lifetimes and mutability
// correctly.
fn recognize<'a, 'r>(c: char, ctx: &'r mut LexerState<'a>) -> &'r mut LexerState<'a> {
    match c {
        'A' ... 'z'  => {
            ctx.str_buf.push(c);
        },
        ',' => {
            ctx.tokens.push(Field(ctx.str_buf));
            ctx.field += 1;
            ctx.str_buf = &std::string::String::new();
        },
        _ => ()
    };
    ctx.character += 1;
    ctx
}

fn lex<'a, I, E>(it: &mut I)
                     -> LexerState<'a> where I: Iterator<Result<char, E>> {
    let mut ctx = LexerState { character: 0, field: 0,
                               tokens: Vec::new(), str_buf: &std::string::String::new() };
    for val in *it {
        let c:char = val.ok().expect("wtf");
        recognize(c, &mut ctx);
    }
    ctx
}

fn main() {
    let tokens = lex(&mut std::io::stdio::stdin().chars());
    println!("{}", tokens)
}

Upvotes: 1

Views: 83

Answers (1)

Ms2ger
Ms2ger

Reputation: 15983

In this case, you're constructing new strings rather than borrowing existing strings, so you'd use an owned string directly:

use std::mem;

#[deriving(Show)]
enum Token {
    Field(String),
}

#[deriving(Show)]
struct LexerState {
    character: int,
    field: int,
    tokens: Vec<Token>,
    str_buf: String,
}

// The goal with recognize is to:
//
//  * gather all A .. z into a temporary string buffer str_buf
//  * on ',', move buffer into a Field token
//  * store the completely extracted field in LexerState's tokens attribute
//
// I think I'm not understanding how to specify the lifetimes and mutability
// correctly.
fn recognize<'a, 'r>(c: char, ctx: &'r mut LexerState) -> &'r mut LexerState {
    match c {
        'A' ...'z' => { ctx.str_buf.push(c); }
        ',' => {
            ctx.tokens.push(Field(mem::replace(&mut ctx.str_buf,
                                               String::new())));
            ctx.field += 1;
        }
        _ => (),
    };
    ctx.character += 1;
    ctx
}

fn lex<I, E>(it: &mut I) -> LexerState where I: Iterator<Result<char, E>> {
    let mut ctx =
        LexerState{
            character: 0,
            field: 0,
            tokens: Vec::new(),
            str_buf: String::new(),
        };
    for val in *it {
        let c: char = val.ok().expect("wtf");
        recognize(c, &mut ctx);
    }
    ctx
}

fn main() {
    let tokens = lex(&mut std::io::stdio::stdin().chars());
    println!("{}" , tokens)
}

Upvotes: 4

Related Questions