Onei
Onei

Reputation: 177

error[E0507]: Cannot move out of borrowed content

I'm trying to make a lexer in Rust while being relatively new to it but with a background in C/C++. I'm having problems with how Rust allocates memory in the following code, which generates the error "Cannot move out of borrowed content". I've read cargo --explain E0507 which details possible solutions, but I'm struggling to grasp the underlying differences between how Rust and C/C++ manage memory. In essence, I want to understand how to manage dynamic memory in Rust (or a better way to achieve what I'm doing).

The error is:

error[E0507]: cannot move out of borrowed content
  --> <anon>:65:16
   |
65 |         return self.read_tok.unwrap();
   |                ^^^^ cannot move out of borrowed content

error[E0507]: cannot move out of borrowed content
  --> <anon>:73:16
   |
73 |         return self.peek_tok.unwrap();
   |                ^^^^ cannot move out of borrowed content

error: aborting due to 2 previous errors

The code is:

use std::fmt;

#[derive(Debug, PartialEq)]
pub enum TokenType {
    EndOfFile,
    Illegal
}

pub struct Token {
    token_type: TokenType,
    value: String
}

impl Token {
    pub fn new(token_type: TokenType, value: String) -> Token {
        return Token {
            token_type: token_type,
            value: value
        };
    }

    pub fn is_token_type(&self, token_type: TokenType) -> bool {
        return self.token_type == token_type;
    }
}

impl fmt::Debug for Token {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{:?}[{}]", self.token_type, self.value)
    }
}

pub struct Lexer {
    input: String,
    read_pos: usize,
    peek_pos: usize,
    ch: char,
    read_tok: Option<Token>,
    peek_tok: Option<Token>
}

const EOF: char = 0 as char;

impl Lexer {
    pub fn new(input: &str) -> Lexer {
        return Lexer {
            input: input.to_string(),
            read_pos: 0,
            peek_pos: 1,
            ch: EOF,
            read_tok: None,
            peek_tok: None
        };
    }

    pub fn next_token(&mut self) -> Token {
        if self.peek_tok.is_none() {
            self.read_tok = Some(self.get_next_token());
        } else {
            self.read_tok = self.peek_tok.take();
        }

        return self.read_tok.unwrap();
    }

    pub fn peek_token(&mut self) -> Token {
        if self.peek_tok.is_none() {
            self.peek_tok = Some(self.get_next_token());
        }

        return self.peek_tok.unwrap();
    }

    fn get_next_token(&mut self) -> Token {
        let ch = self.next_char();
        let tok: Token;

        match ch {
            EOF => { tok = Token::new(TokenType::EndOfFile, "".to_string()); }
            _   => { tok = Token::new(TokenType::Illegal, ch.to_string()); }
        }

        return tok;
    }

    fn next_char(&mut self) -> char {
        if self.peek_pos >= self.input.len() {
            self.ch = EOF;
        } else {
            self.ch = self.input.chars().nth(self.peek_pos).unwrap();
        }

        self.read_pos = self.peek_pos;
        self.peek_pos += 1;

        return self.ch;
    }
}


fn main() {
    let input = "let x = 5;";
    let mut l = Lexer::new(input);

    loop {
        let t = l.next_token();
        println!("{:?}", t);

        if t.is_token_type(TokenType::EndOfFile) {
            break;
        }
    }
}

Rust playground link: https://play.rust-lang.org/?gist=bc85fafa35a5cbbd5ac4066aef9e333c&version=stable&backtrace=0https://play.rust-lang.org/?gist=21cba64f53488ee0a9389c0191c47134&version=stable&backtrace=0

I've managed to translate a working implementation in C++ which might give some more info on what I'm trying to achieve:

#include <string>
#include <iostream>

enum TokenType {
    ENDOFFILE,
    ILLEGAL
};

class Token {
private:
    enum TokenType token_type;
    std::string value;

public:
    Token(enum TokenType token_type, std::string value)
    {
        this->token_type = token_type;
        this->value = value;
    }

    bool is_token_type(enum TokenType token_type)
    {
        return this->token_type == token_type;
    }

    std::string to_string()
    {
        std::string tok;

        switch (this->token_type) {
        case ENDOFFILE:
            tok = "EndOfFile";
            break;
        case ILLEGAL:
            tok = "Illegal[" + this->value + "]";
            break;
        }

        return tok;
    }
};

class Lexer {
private:
    std::string input;
    int read_pos;
    int peek_pos;
    char ch;
    Token *read_tok;
    Token *peek_tok;

    Token *get_next_token() {
        char c = this->next_char();
        std::string c_str;
        Token *t;

        c_str.push_back(c);

        switch (c) {
        case 0:
            t = new Token(ENDOFFILE, "");
            break;
        default:
            t = new Token(ILLEGAL, c_str);
        }

        return t;
    }

    char next_char()
    {
        if (this->peek_pos >= this->input.length()) {
            this->ch = 0;
        } else {
            this->ch = input.at(this->peek_pos);
        }

        this->read_pos = this->peek_pos;
        this->peek_pos += 1;

        return this->ch;
    }

public:
    Lexer (std::string input)
    {
        this->input = input;
        this->read_pos = -1;
        this->peek_pos = 0;
        this->ch = 0;
        this->read_tok = NULL;
        this->peek_tok = NULL;
    }

    Token *next_token()
    {
        if (this->read_tok != NULL) {
            delete this->read_tok;
        }

        if (this->peek_tok == NULL) {
            this->read_tok = this->get_next_token();
        } else {
            this->read_tok = this->peek_tok;
            this->peek_tok = NULL;
        }

        return this->read_tok;
    }

    Token *peek_token()
    {
        if (this->peek_tok == NULL) {
            this->peek_tok = this->get_next_token();
        }

        return this->peek_tok;
    }
};

int main(int argc, char **argv)
{
    std::string input = "let x = 5;";
    Lexer l = Lexer(input);

    while (1) {
        Token *t = l.next_token();
        std::cout << t->to_string() << std::endl;

        if (t->is_token_type(ENDOFFILE)) {
            break;
        }
    }

    return 0;
}

Upvotes: 0

Views: 3295

Answers (1)

user4815162342
user4815162342

Reputation: 155286

You came very close to getting it right, but there are two problems with your code.

First, as the compiler tells you, the following is prohibited:

self.read_tok = self.peek_tok;
self.peek_tok = None;

The first line attempts to move an Option<Token> object out of self.peek_tok. In Rust, objects can be moved out of variables, but not out of structure fields or slice subscripts. This is because the compiler can check that the variable is not used after the move, as well as arrange that its destructor is not invoked. This is not possible for objects stored in fields of structures or inside slices, at least not without adding overhead to every structure or container.

Moving objects out of structs is possible as long as they are stored in an intermediate container that supports moving. Fortunately, Option is such a container, and its take() method is designed for exactly that purpose:

self.read_tok = self.peek_tok.take()

Option::take() moves the object from the option, replaces it with None, and returns the object.

Second, once the above is fixed, the compiler complains of "moving out of borrowed content" on the return statements of next_token and peek_token, because they attempt to move objects out of the Option. Here you have the choice of cloning the Token, or moving it out of the option using Option::take() as above. The cloning approach requires adding #[derive(Clone)] to TokenType and Token, as well as changing the returns to:

// Use as_ref() to convert Option<Token> to Option<&Token>,
// which is unwrapped and the Token inside cloned
self.read_tok.as_ref().unwrap().clone()

With these changes, the example compiles, although it still flags the input as illegal.

Upvotes: 5

Related Questions