From a661d9ee944bd26634aba04d371c3a5cd853eb40 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Sat, 20 Jun 2015 05:41:54 -0400 Subject: [PATCH] add an LR(1) interpreter that executes the action tables and constructs a parse-tree. Use is to test the results from the LR(1) table generation algorithm. --- src/intern/mod.rs | 2 +- src/lr1/first/mod.rs | 2 +- src/lr1/interpret.rs | 126 +++++++++++++++++++++++++++++++++++++++++++ src/lr1/mod.rs | 7 +-- src/lr1/test.rs | 29 ++++++++++ 5 files changed, 161 insertions(+), 5 deletions(-) create mode 100644 src/lr1/interpret.rs diff --git a/src/intern/mod.rs b/src/intern/mod.rs index 0e6b684..8b98984 100644 --- a/src/intern/mod.rs +++ b/src/intern/mod.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::cell::RefCell; use std::fmt::{Debug, Display, Error, Formatter}; use std::cmp::{PartialOrd, Ord, Ordering}; -use util::{map, Map}; +use util::{map}; #[cfg(test)] mod test; diff --git a/src/lr1/first/mod.rs b/src/lr1/first/mod.rs index 7669922..7bb5f8d 100644 --- a/src/lr1/first/mod.rs +++ b/src/lr1/first/mod.rs @@ -2,7 +2,7 @@ use grammar::repr::*; use std::collections::{HashMap, HashSet}; -use util::{map, Map}; +use util::{map}; use super::Lookahead; diff --git a/src/lr1/interpret.rs b/src/lr1/interpret.rs new file mode 100644 index 0000000..20e8a86 --- /dev/null +++ b/src/lr1/interpret.rs @@ -0,0 +1,126 @@ +//! LR(1) interpeter. Just builds up parse trees. Intended for testing. + +use lr1::{Action, State, Lookahead}; +use grammar::repr::*; +use std::fmt::{Debug, Display, Formatter, Error}; +use util::Sep; + +#[derive(PartialEq, Eq)] +pub enum ParseTree { + Nonterminal(NonterminalString, Vec), + Terminal(TerminalString), +} + +pub fn interpret<'g,TOKENS>(states: &'g [State<'g>], tokens: TOKENS) + -> Result + where TOKENS: Iterator +{ + let mut m = Machine::new(states); + m.execute(tokens) +} + +struct Machine<'grammar> { + states: &'grammar [State<'grammar>], + state_stack: Vec<&'grammar State<'grammar>>, + data_stack: Vec, +} + +impl<'grammar> Machine<'grammar> { + fn new(states: &'grammar [State<'grammar>]) -> Machine<'grammar> { + Machine { states: states, + state_stack: vec![], + data_stack: vec![] } + } + + fn execute(&mut self, mut tokens: TOKENS) -> Result + where TOKENS: Iterator + { + assert!(self.state_stack.is_empty()); + assert!(self.data_stack.is_empty()); + + self.state_stack.push(&self.states[0]); + + let mut token = tokens.next(); + while let Some(terminal) = token { + let state = *self.state_stack.last().unwrap(); + + // check whether we can shift this token + match state.tokens.get(&Lookahead::Terminal(terminal)) { + None => { return Err(()); } + + Some(&Action::Shift(next_index)) => { + self.data_stack.push(ParseTree::Terminal(terminal)); + self.state_stack.push(&self.states[next_index.0]); + token = tokens.next(); + } + + Some(&Action::Reduce(production)) => { + let more = self.reduce(production); + assert!(more); + } + } + } + + // drain now for EOF + loop { + let state = *self.state_stack.last().unwrap(); + match state.tokens.get(&Lookahead::EOF) { + None => { return Err(()); } + Some(&Action::Shift(_)) => { unreachable!("cannot shift EOF") } + Some(&Action::Reduce(production)) => { + if !self.reduce(production) { + assert_eq!(self.data_stack.len(), 1); + return Ok(self.data_stack.pop().unwrap()); + } + } + } + } + } + + fn reduce(&mut self, production: &Production) -> bool { + let args = production.symbols.len(); + + // remove the top N items from the data stack + let mut popped = vec![]; + for _ in 0 .. args { + popped.push(self.data_stack.pop().unwrap()); + } + popped.reverse(); + + // remove the top N states + for _ in 0 .. args { + self.state_stack.pop().unwrap(); + } + + // construct the new, reduced tree and push it on the stack + let tree = ParseTree::Nonterminal(production.nonterminal, popped); + self.data_stack.push(tree); + + // recover the state and extract the "Goto" action + let receiving_state = *self.state_stack.last().unwrap(); + match receiving_state.gotos.get(&production.nonterminal) { + Some(goto_state) => { + self.state_stack.push(&self.states[goto_state.0]); + true // keep going + } + None => { + false // all done + } + } + } +} + +impl Debug for ParseTree { + fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { + Display::fmt(self, fmt) + } +} + +impl Display for ParseTree { + fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { + match *self { + ParseTree::Nonterminal(id, ref trees) => write!(fmt, "[{}: {}]", id, Sep(", ", trees)), + ParseTree::Terminal(id) => write!(fmt, "{}", id), + } + } +} diff --git a/src/lr1/mod.rs b/src/lr1/mod.rs index 6b7cbfd..e939286 100644 --- a/src/lr1/mod.rs +++ b/src/lr1/mod.rs @@ -1,12 +1,12 @@ //! Naive LR(1) generation algorithm. use grammar::repr::*; -use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Formatter, Error}; use std::rc::Rc; use util::{map, Map, Multimap, Set, Prefix}; mod first; +mod interpret; #[cfg(test)] mod test; @@ -16,7 +16,8 @@ struct LR1<'grammar> { } #[derive(Debug)] -struct State<'grammar> { +pub struct State<'grammar> { + index: StateIndex, items: Items<'grammar>, tokens: Map>, gotos: Map, @@ -234,7 +235,7 @@ impl<'grammar> StateSet<'grammar> { let states = &mut self.states; *self.state_map.entry(items.clone()).or_insert_with(|| { let index = StateIndex(states.len()); - states.push(State { items: items, tokens: map(), gotos: map() }); + states.push(State { index: index, items: items, tokens: map(), gotos: map() }); index }) } diff --git a/src/lr1/test.rs b/src/lr1/test.rs index 6f66b57..c9f9acc 100644 --- a/src/lr1/test.rs +++ b/src/lr1/test.rs @@ -3,11 +3,18 @@ use grammar::repr::*; use test_util::{expect_debug, normalized_grammar}; use super::{Items, Lookahead, LR1}; use super::Lookahead::EOF; +use super::interpret::interpret; fn nt(t: &str) -> NonterminalString { NonterminalString(intern(t)) } +macro_rules! tokens { + ($($x:expr),*) => { + vec![$(TerminalString(intern($x))),*].into_iter() + } +} + fn items<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Lookahead) -> Items<'g> { @@ -97,4 +104,26 @@ grammar Foo { // and yields expected number of states. let states = lr1.build_states(nt("S")).unwrap(); assert_eq!(states.len(), 16); + + // execute it on some sample inputs. + let tree = interpret(&states, tokens!["N", "-", "(", "N", "-", "N", ")"]).unwrap(); + assert_eq!( + &format!("{:?}", tree)[..], + r#"[S: [E: [E: [T: "N"]], "-", [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]]]"#); + + // incomplete: + assert!(interpret(&states, tokens!["N", "-", "(", "N", "-", "N"]).is_err()); + + // incomplete: + assert!(interpret(&states, tokens!["N", "-"]).is_err()); + + // unexpected character: + assert!(interpret(&states, tokens!["N", "-", ")", "N", "-", "N", "("]).is_err()); + + // parens first: + let tree = interpret(&states, tokens!["(", "N", "-", "N", ")", "-", "N"]).unwrap(); + println!("{}", tree); + assert_eq!( + &format!("{}", tree)[..], + r#"[S: [E: [E: [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]], "-", [T: "N"]]]"#); }