diff --git a/doc/calculator/src/calculator8.lalrpop b/doc/calculator/src/calculator8.lalrpop index 87616f9..79f84a6 100644 --- a/doc/calculator/src/calculator8.lalrpop +++ b/doc/calculator/src/calculator8.lalrpop @@ -1,7 +1,7 @@ use ast::{ExprSymbol, Opcode}; use tok8::Tok; -grammar<'input>(text: &'input str); +grammar<'input>(input: &'input str); pub Expr: Box> = { // (1) Expr "ExprOp" Factor => Box::new(ExprSymbol::Op(<>)), // (2) @@ -30,7 +30,6 @@ extern { "(" => Tok::ParenOpen, ")" => Tok::ParenClose, } - } diff --git a/doc/calculator/src/tok8.rs b/doc/calculator/src/tok8.rs index 3719cd0..578c843 100644 --- a/doc/calculator/src/tok8.rs +++ b/doc/calculator/src/tok8.rs @@ -4,7 +4,6 @@ pub type Spanned = Result<(Loc, Tok, Loc), Error>; #[derive(Copy, Clone, Debug)] pub enum Tok<'input> { - Num(i32), NumSymbol(&'input str), FactorOp(Opcode), ExprOp(Opcode), diff --git a/doc/src/lexer_tutorial/003_token_references.md b/doc/src/lexer_tutorial/003_token_references.md index 55ef9c0..7422917 100644 --- a/doc/src/lexer_tutorial/003_token_references.md +++ b/doc/src/lexer_tutorial/003_token_references.md @@ -1 +1,128 @@ # Using tokens with references + +When using a custom lexer, you might want tokens to hold references to the original input. +This allows to use references to the input when the grammar can have arbitrary symbols such as variable names. +Using references instead of copying the symbols can improve performance and memory usage of the parser. + +## The Lexer + +We can now create a new calculator parser that can deal with symbols the same way an interpreter would deal with variables. +First we need the corresponding AST : + +``` rust +pub enum ExprSymbol<'input>{ + NumSymbol(&'input str), + Op(Box>, Opcode, Box>), + Error, +} +``` + +Then, we need to build the tokens: + + +``` rust +#[derive(Copy, Clone, Debug)] +pub enum Tok<'input> { + NumSymbol(&'input str), + FactorOp(Opcode), + ExprOp(Opcode), + ParenOpen, + ParenClose, +} +``` + +Then, we can build the lexer itself. +It's quite simple, it returns any operator, and if it detects any other character, stores the beginning then continues to the next operator and sends the symbol it just parsed. + +``` rust +use std::str::CharIndices; + +pub struct Lexer<'input> { + chars: std::iter::Peekable>, + input: &'input str, +} + +impl<'input> Lexer<'input> { + pub fn new(input: &'input str) -> Self { + Lexer { + chars: input.char_indices().peekable(), + input, + } + } +} + +impl<'input> Iterator for Lexer<'input> { + type Item = Spanned, usize, ()>; + + fn next(&mut self) -> Option { + loop { + match self.chars.next() { + Some((_, ' ')) | Some((_, '\n')) | Some((_, '\t')) => continue, + Some((i, ')')) => return Some(Ok((i, Tok::ParenClose, i + 1))), + Some((i, '(')) => return Some(Ok((i, Tok::ParenOpen, i + 1))), + Some((i, '+')) => return Some(Ok((i, Tok::ExprOp(Opcode::Add), i + 1))), + Some((i, '-')) => return Some(Ok((i, Tok::ExprOp(Opcode::Sub), i + 1))), + Some((i, '*')) => return Some(Ok((i, Tok::FactorOp(Opcode::Mul), i + 1))), + Some((i, '/')) => return Some(Ok((i, Tok::FactorOp(Opcode::Div), i + 1))), + + None => return None, // End of file + Some((i,_)) => { + loop { + match self.chars.peek() { + Some((j, ')'))|Some((j, '('))|Some((j, '+'))|Some((j, '-'))|Some((j, '*'))|Some((j, '/'))|Some((j,' ')) + => return Some(Ok((i, Tok::NumSymbol(&self.input[i..*j]), *j))), + None => return Some(Ok((i, Tok::NumSymbol(&self.input[i..]),self.input.len()))), + _ => {self.chars.next();}, + } + } + } + } + } + } +} +``` + +## The parser + +We can then take a look at the corresponding parser with a new grammar: + +``` rust +Term: Box> = { + "num" => Box::new(ExprSymbol::NumSymbol(<>)), + "(" ")" +}; +``` + +We need to pass the input to the parser so that the input's lifetime is known to the borrow checker when compiling the generated parser. +``` rust +grammar<'input>(input: &'input str); +``` + +Then we just need to define the tokens the same as before : + +``` rust +extern { + type Location = usize; + type Error = (); + + enum Tok<'input> { + "num" => Tok::NumSymbol(<&'input str>), + "FactorOp" => Tok::FactorOp(), + "ExprOp" => Tok::ExprOp(), + "(" => Tok::ParenOpen, + ")" => Tok::ParenClose, + } +} +``` + +# Calling the parser +We can finally run the parser we built: + +``` rust +let input = "22 * pi + 66"; +let lexer = Lexer::new(input); +let expr = calculator8::ExprParser::new() + .parse(input,lexer) + .unwrap(); +assert_eq!(&format!("{:?}", expr), "((\"22\" * \"pi\") + \"66\")"); +```