From 990f2a931aaa998f26d8c13f36ecfe5a94037013 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Fri, 19 Jun 2015 13:27:24 -0400 Subject: [PATCH] sketch out the state enumeration algorithm --- src/lr1/mod.rs | 104 ++++++++++++++++++++++++++++++++++++++---------- src/lr1/test.rs | 4 +- 2 files changed, 84 insertions(+), 24 deletions(-) diff --git a/src/lr1/mod.rs b/src/lr1/mod.rs index 158423c..5f19cd2 100644 --- a/src/lr1/mod.rs +++ b/src/lr1/mod.rs @@ -3,6 +3,7 @@ use grammar::repr::*; use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Formatter, Error}; +use std::rc::Rc; use util::Prefix; mod first; @@ -11,16 +12,17 @@ mod first; struct LR1<'grammar> { grammar: &'grammar Grammar, - states: Vec>, first_sets: first::FirstSets, } struct State<'grammar> { - configurations: Vec>, + configurations: Configurations<'grammar>, shifts: HashMap, gotos: HashMap, } +type Configurations<'grammar> = Rc>>; + #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] struct StateIndex(usize); @@ -37,29 +39,50 @@ struct Configuration<'grammar> { lookahead: Lookahead, } +struct StateSet<'grammar> { + states: Vec>, + state_map: HashMap, StateIndex>, +} + impl<'grammar> LR1<'grammar> { fn new(grammar: &'grammar Grammar) -> LR1 { LR1 { grammar: grammar, - states: vec![], first_sets: first::FirstSets::new(grammar), } } - fn build_states(&mut self, start_nt: NonterminalString) { - debug_assert!(self.states.is_empty()); + fn build_states(&mut self, start_nt: NonterminalString) -> Vec> { + let mut state_set = StateSet::new(); - let state0 = self.start_state(start_nt, Lookahead::EOF); - self.states.push(state0); - } - - fn start_state(&self, id: NonterminalString, lookahead: Lookahead) -> State<'grammar> { - let configurations = + // create the starting state + state_set.add_state( self.transitive_closure( - self.configurations(id, 0, lookahead)); - State { configurations: configurations, - shifts: HashMap::new(), - gotos: HashMap::new() } + self.configurations(start_nt, 0, Lookahead::EOF))); + + let mut counter = 0; + while counter < state_set.states.len() { + let configurations = state_set.states[counter].configurations.clone(); + counter += 1; + + // for each configuration where we can shift, do so, and + // create the transitive closure of the resulting state + let shifted_configurations = + configurations + .iter() + .filter_map(|configuration| configuration.shifted_configuration()) + .map(|configuration| self.transitive_closure(vec![configuration])); + + // add a state for each of those cases where we did a shift + for configuration in shifted_configurations { + state_set.add_state(configuration); + } + + // extract a new state + counter += 1; + } + + state_set.states } fn configurations(&self, @@ -81,7 +104,7 @@ impl<'grammar> LR1<'grammar> { // expands `state` with epsilon moves fn transitive_closure(&self, mut configurations: Vec>) - -> Vec> + -> Configurations<'grammar> { let mut counter = 0; @@ -114,19 +137,56 @@ impl<'grammar> LR1<'grammar> { configurations.extend(new_configurations); } - configurations + Rc::new(configurations) } } impl<'grammar> Configuration<'grammar> { - fn shift_symbol(&self) -> Option<(Symbol, &[Symbol])> { - if self.index == self.production.symbols.len() { - None + fn can_shift(&self) -> bool { + self.index < self.production.symbols.len() + } + + fn can_reduce(&self) -> bool { + self.index == self.production.symbols.len() + } + + fn shifted_configuration(&self) -> Option> { + if self.can_shift() { + Some(Configuration { production: self.production, + index: self.index + 1, + lookahead: self.lookahead }) } else { - Some((self.production.symbols[self.index], - &self.production.symbols[self.index+1..])) + None } } + + fn shift_symbol(&self) -> Option<(Symbol, &[Symbol])> { + if self.can_shift() { + Some((self.production.symbols[self.index], &self.production.symbols[self.index+1..])) + } else { + None + } + } +} + +impl<'grammar> StateSet<'grammar> { + fn new() -> StateSet<'grammar> { + StateSet { + states: vec![], + state_map: HashMap::new(), + } + } + + fn add_state(&mut self, configurations: Configurations<'grammar>) -> StateIndex { + let states = &mut self.states; + *self.state_map.entry(configurations.clone()).or_insert_with(|| { + let index = StateIndex(states.len()); + states.push(State { configurations: configurations, + shifts: HashMap::new(), + gotos: HashMap::new() }); + index + }) + } } impl<'grammar> Debug for Configuration<'grammar> { diff --git a/src/lr1/test.rs b/src/lr1/test.rs index 940eb75..234a9ae 100644 --- a/src/lr1/test.rs +++ b/src/lr1/test.rs @@ -1,7 +1,7 @@ use intern::intern; use grammar::repr::*; use test_util::{expect_debug, normalized_grammar}; -use super::{Configuration, Lookahead, LR1}; +use super::{Configuration, Configurations, Lookahead, LR1}; use super::Lookahead::EOF; fn nt(t: &str) -> NonterminalString { @@ -9,7 +9,7 @@ fn nt(t: &str) -> NonterminalString { } fn configurations<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Lookahead) - -> Vec> + -> Configurations<'g> { let lr1 = LR1::new(&grammar); let configurations =