From c66f7e2d40fd06606a84a6bce695397935bdeaa8 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Fri, 19 Jun 2015 17:28:03 -0400 Subject: [PATCH] lr1 construction seems to be working, tests are not yet really stable --- src/grammar/repr.rs | 13 ++- src/intern/mod.rs | 3 +- src/lr1/first/mod.rs | 3 +- src/lr1/mod.rs | 120 ++++++++++++--------- src/lr1/test.rs | 252 +++++++++++++++++++++++++++++++++++++++++-- src/util.rs | 49 +++++++++ 6 files changed, 369 insertions(+), 71 deletions(-) diff --git a/src/grammar/repr.rs b/src/grammar/repr.rs index 8672944..e755453 100644 --- a/src/grammar/repr.rs +++ b/src/grammar/repr.rs @@ -5,9 +5,8 @@ */ use intern::InternedString; -use std::collections::HashMap; use std::fmt::{Debug, Display, Formatter, Error}; -use util::Sep; +use util::{map, Map, Sep}; // These concepts we re-use wholesale pub use grammar::parse_tree::{NonterminalString, Span, TerminalString}; @@ -15,8 +14,8 @@ pub use grammar::parse_tree::{NonterminalString, Span, TerminalString}; #[derive(Clone, Debug)] pub struct Grammar { pub action_fn_defns: Vec, - pub productions: HashMap>, - pub conversions: HashMap, + pub productions: Map>, + pub conversions: Map, pub types: Types, } @@ -54,13 +53,13 @@ pub enum TypeRepr { #[derive(Clone, Debug)] pub struct Types { terminal_type: TypeRepr, - nonterminal_types: HashMap + nonterminal_types: Map } impl Types { pub fn new(terminal_type: TypeRepr) -> Types { Types { terminal_type: terminal_type, - nonterminal_types: HashMap::new() } + nonterminal_types: map() } } pub fn add_type(&mut self, nt_id: NonterminalString, ty: TypeRepr) { @@ -173,7 +172,7 @@ impl Grammar { types: Types) -> Grammar { - let mut productions = HashMap::new(); + let mut productions = map(); for production in flat_productions { let mut vec = productions.entry(production.nonterminal).or_insert(vec![]); diff --git a/src/intern/mod.rs b/src/intern/mod.rs index 945e835..0e6b684 100644 --- a/src/intern/mod.rs +++ b/src/intern/mod.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::cell::RefCell; use std::fmt::{Debug, Display, Error, Formatter}; use std::cmp::{PartialOrd, Ord, Ordering}; +use util::{map, Map}; #[cfg(test)] mod test; @@ -50,7 +51,7 @@ fn write(f: F) -> R impl Interner { fn new() -> Interner { - Interner { map: HashMap::new(), strings: vec![] } + Interner { map: map(), strings: vec![] } } pub fn data(&self, i: InternedString) -> &str { diff --git a/src/lr1/first/mod.rs b/src/lr1/first/mod.rs index e9cdcc2..7669922 100644 --- a/src/lr1/first/mod.rs +++ b/src/lr1/first/mod.rs @@ -2,6 +2,7 @@ use grammar::repr::*; use std::collections::{HashMap, HashSet}; +use util::{map, Map}; use super::Lookahead; @@ -16,7 +17,7 @@ pub type FirstSet = HashSet>; impl FirstSets { pub fn new(grammar: &Grammar) -> FirstSets { - let mut this = FirstSets { map: HashMap::new() }; + let mut this = FirstSets { map: map() }; let mut changed = true; while changed { changed = false; diff --git a/src/lr1/mod.rs b/src/lr1/mod.rs index 5f19cd2..47858a3 100644 --- a/src/lr1/mod.rs +++ b/src/lr1/mod.rs @@ -4,7 +4,7 @@ use grammar::repr::*; use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Formatter, Error}; use std::rc::Rc; -use util::Prefix; +use util::{map, Map, Multimap, Set, Prefix}; mod first; @@ -15,15 +15,16 @@ struct LR1<'grammar> { first_sets: first::FirstSets, } +#[derive(Debug)] struct State<'grammar> { - configurations: Configurations<'grammar>, - shifts: HashMap, - gotos: HashMap, + items: Items<'grammar>, + shifts: Vec<(TerminalString, StateIndex)>, + gotos: Vec<(NonterminalString, StateIndex)>, } -type Configurations<'grammar> = Rc>>; +type Items<'grammar> = Rc>>; -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] struct StateIndex(usize); #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] @@ -33,7 +34,7 @@ enum Lookahead { } #[derive(Copy, Clone, Hash, PartialEq, Eq)] -struct Configuration<'grammar> { +struct Item<'grammar> { production: &'grammar Production, index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C lookahead: Lookahead, @@ -41,7 +42,7 @@ struct Configuration<'grammar> { struct StateSet<'grammar> { states: Vec>, - state_map: HashMap, StateIndex>, + state_map: Map, StateIndex>, } impl<'grammar> LR1<'grammar> { @@ -52,30 +53,38 @@ impl<'grammar> LR1<'grammar> { } } - fn build_states(&mut self, start_nt: NonterminalString) -> Vec> { + fn build_states(&self, start_nt: NonterminalString) -> Vec> { let mut state_set = StateSet::new(); // create the starting state state_set.add_state( self.transitive_closure( - self.configurations(start_nt, 0, Lookahead::EOF))); + self.items(start_nt, 0, Lookahead::EOF))); let mut counter = 0; while counter < state_set.states.len() { - let configurations = state_set.states[counter].configurations.clone(); - counter += 1; + let items = state_set.states[counter].items.clone(); - // for each configuration where we can shift, do so, and - // create the transitive closure of the resulting state - let shifted_configurations = - configurations - .iter() - .filter_map(|configuration| configuration.shifted_configuration()) - .map(|configuration| self.transitive_closure(vec![configuration])); + // group the items that we can transition into by shifting + // over a term or nonterm + let transitions: Multimap> = + items.iter() + .filter_map(|item| item.shifted_item()) + .collect(); - // add a state for each of those cases where we did a shift - for configuration in shifted_configurations { - state_set.add_state(configuration); + for (symbol, items) in transitions.into_iter() { + let items = self.transitive_closure(items); + let next_state = state_set.add_state(items); + + // FIXME check for conflicts + match symbol { + Symbol::Terminal(t) => { + state_set.states[counter].shifts.push((t, next_state)); + } + Symbol::Nonterminal(t) => { + state_set.states[counter].gotos.push((t, next_state)); + } + } } // extract a new state @@ -85,17 +94,17 @@ impl<'grammar> LR1<'grammar> { state_set.states } - fn configurations(&self, + fn items(&self, id: NonterminalString, index: usize, lookahead: Lookahead) - -> Vec> + -> Vec> { self.grammar.productions_for(id) .iter() .map(|production| { debug_assert!(index <= production.symbols.len()); - Configuration { production: production, + Item { production: production, index: index, lookahead: lookahead } }) @@ -103,45 +112,45 @@ impl<'grammar> LR1<'grammar> { } // expands `state` with epsilon moves - fn transitive_closure(&self, mut configurations: Vec>) - -> Configurations<'grammar> + fn transitive_closure(&self, mut items: Vec>) + -> Items<'grammar> { let mut counter = 0; - let mut set: HashSet> = - configurations.iter().cloned().collect(); + let mut set: Set> = + items.iter().cloned().collect(); - while counter < configurations.len() { - let new_configurations: Vec<_> = - configurations[counter..] + while counter < items.len() { + let new_items: Vec<_> = + items[counter..] .iter() - .filter_map(|configuration| { - let shift_symbol = configuration.shift_symbol(); + .filter_map(|item| { + let shift_symbol = item.shift_symbol(); match shift_symbol { None => None, // requires a reduce Some((Symbol::Terminal(_), _)) => None, // requires a shift Some((Symbol::Nonterminal(nt), remainder)) => { - Some((nt, remainder, configuration.lookahead)) + Some((nt, remainder, item.lookahead)) } } }) .flat_map(|(nt, remainder, lookahead)| { let first_set = self.first_sets.first(remainder, lookahead); first_set.into_iter() - .flat_map(move |l| self.configurations(nt, 0, l)) + .flat_map(move |l| self.items(nt, 0, l)) }) - .filter(|&configuration| set.insert(configuration)) + .filter(|&item| set.insert(item)) .collect(); - counter = configurations.len(); - configurations.extend(new_configurations); + counter = items.len(); + items.extend(new_items); } - Rc::new(configurations) + Rc::new(items) } } -impl<'grammar> Configuration<'grammar> { +impl<'grammar> Item<'grammar> { fn can_shift(&self) -> bool { self.index < self.production.symbols.len() } @@ -150,11 +159,12 @@ impl<'grammar> Configuration<'grammar> { self.index == self.production.symbols.len() } - fn shifted_configuration(&self) -> Option> { + fn shifted_item(&self) -> Option<(Symbol, Item<'grammar>)> { if self.can_shift() { - Some(Configuration { production: self.production, - index: self.index + 1, - lookahead: self.lookahead }) + Some((self.production.symbols[self.index], + Item { production: self.production, + index: self.index + 1, + lookahead: self.lookahead })) } else { None } @@ -173,23 +183,23 @@ impl<'grammar> StateSet<'grammar> { fn new() -> StateSet<'grammar> { StateSet { states: vec![], - state_map: HashMap::new(), + state_map: map(), } } - fn add_state(&mut self, configurations: Configurations<'grammar>) -> StateIndex { + fn add_state(&mut self, items: Items<'grammar>) -> StateIndex { let states = &mut self.states; - *self.state_map.entry(configurations.clone()).or_insert_with(|| { + *self.state_map.entry(items.clone()).or_insert_with(|| { let index = StateIndex(states.len()); - states.push(State { configurations: configurations, - shifts: HashMap::new(), - gotos: HashMap::new() }); + states.push(State { items: items, + shifts: Vec::new(), + gotos: Vec::new() }); index }) } } -impl<'grammar> Debug for Configuration<'grammar> { +impl<'grammar> Debug for Item<'grammar> { fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { write!(fmt, "{} ={} (*){} [{:?}]", self.production.nonterminal, @@ -207,3 +217,9 @@ impl Debug for Lookahead { } } } + +impl Debug for StateIndex { + fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { + write!(fmt, "S{}", self.0) + } +} diff --git a/src/lr1/test.rs b/src/lr1/test.rs index 234a9ae..d11b40b 100644 --- a/src/lr1/test.rs +++ b/src/lr1/test.rs @@ -1,21 +1,21 @@ use intern::intern; use grammar::repr::*; use test_util::{expect_debug, normalized_grammar}; -use super::{Configuration, Configurations, Lookahead, LR1}; +use super::{Items, Lookahead, LR1}; use super::Lookahead::EOF; fn nt(t: &str) -> NonterminalString { NonterminalString(intern(t)) } -fn configurations<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Lookahead) - -> Configurations<'g> +fn items<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Lookahead) + -> Items<'g> { let lr1 = LR1::new(&grammar); - let configurations = + let items = lr1.transitive_closure( - lr1.configurations(nt(nonterminal), index, la)); - configurations + lr1.items(nt(nonterminal), index, la)); + items } #[test] @@ -30,8 +30,8 @@ grammar Foo { }; } "#); - let configurations = configurations(&grammar, "A", 0, EOF); - expect_debug(configurations, r#"[ + let items = items(&grammar, "A", 0, EOF); + expect_debug(items, r#"[ A = (*) B "C" [EOF], B = (*) "D" ["C"], B = (*) ["C"] @@ -55,7 +55,7 @@ grammar Foo { } "#); - expect_debug(configurations(&grammar, "A", 0, EOF), r#"[ + expect_debug(items(&grammar, "A", 0, EOF), r#"[ A = (*) B C [EOF], B = (*) "B1" ["C1"], B = (*) ["C1"], @@ -63,9 +63,241 @@ grammar Foo { B = (*) [EOF] ]"#); - expect_debug(configurations(&grammar, "A", 1, EOF), r#"[ + expect_debug(items(&grammar, "A", 1, EOF), r#"[ A = B (*) C [EOF], C = (*) "C1" [EOF], C = (*) [EOF] ]"#); } + +#[test] +fn expr_grammar1() { + let grammar = normalized_grammar(r#" +grammar Foo { + token Tok where { }; + + S: () = + E => (); + + E: () = { + E "-" T => (); + T => (); + }; + + T: () = { + "N" => (); + "(" E ")" => (); + }; +} +"#); + + let lr1 = LR1::new(&grammar); + let mut states = lr1.build_states(nt("S")); + for state in &mut states { + state.shifts.sort(); + state.gotos.sort(); + } + expect_debug(&states, r#"[ + State { + items: [ + S = (*) E [EOF], + E = (*) E "-" T [EOF], + E = (*) T [EOF], + E = (*) E "-" T ["-"], + E = (*) T ["-"], + T = (*) "N" [EOF], + T = (*) "(" E ")" [EOF], + T = (*) "N" ["-"], + T = (*) "(" E ")" ["-"] + ], + shifts: [ + ("(", S4), + ("N", S3) + ], + gotos: [ + (E, S2), + (T, S1) + ] + }, + State { + items: [ + E = T (*) [EOF], + E = T (*) ["-"] + ], + shifts: [], + gotos: [] + }, + State { + items: [ + S = E (*) [EOF], + E = E (*) "-" T [EOF], + E = E (*) "-" T ["-"] + ], + shifts: [ + ("-", S5) + ], + gotos: [] + }, + State { + items: [ + T = "N" (*) [EOF], + T = "N" (*) ["-"] + ], + shifts: [], + gotos: [] + }, + State { + items: [ + T = "(" (*) E ")" [EOF], + T = "(" (*) E ")" ["-"], + E = (*) E "-" T [")"], + E = (*) T [")"], + E = (*) E "-" T ["-"], + E = (*) T ["-"], + T = (*) "N" [")"], + T = (*) "(" E ")" [")"], + T = (*) "N" ["-"], + T = (*) "(" E ")" ["-"] + ], + shifts: [ + ("(", S8), + ("N", S9) + ], + gotos: [ + (E, S7), + (T, S6) + ] + }, + State { + items: [ + E = E "-" (*) T [EOF], + E = E "-" (*) T ["-"], + T = (*) "N" [EOF], + T = (*) "(" E ")" [EOF], + T = (*) "N" ["-"], + T = (*) "(" E ")" ["-"] + ], + shifts: [ + ("(", S4), + ("N", S3) + ], + gotos: [ + (T, S10) + ] + }, + State { + items: [ + E = T (*) [")"], + E = T (*) ["-"] + ], + shifts: [], + gotos: [] + }, + State { + items: [ + T = "(" E (*) ")" [EOF], + T = "(" E (*) ")" ["-"], + E = E (*) "-" T [")"], + E = E (*) "-" T ["-"] + ], + shifts: [ + (")", S12), + ("-", S11) + ], + gotos: [] + }, + State { + items: [ + T = "(" (*) E ")" [")"], + T = "(" (*) E ")" ["-"], + E = (*) E "-" T [")"], + E = (*) T [")"], + E = (*) E "-" T ["-"], + E = (*) T ["-"], + T = (*) "N" [")"], + T = (*) "(" E ")" [")"], + T = (*) "N" ["-"], + T = (*) "(" E ")" ["-"] + ], + shifts: [ + ("(", S8), + ("N", S9) + ], + gotos: [ + (E, S13), + (T, S6) + ] + }, + State { + items: [ + T = "N" (*) [")"], + T = "N" (*) ["-"] + ], + shifts: [], + gotos: [] + }, + State { + items: [ + E = E "-" T (*) [EOF], + E = E "-" T (*) ["-"] + ], + shifts: [], + gotos: [] + }, + State { + items: [ + E = E "-" (*) T [")"], + E = E "-" (*) T ["-"], + T = (*) "N" [")"], + T = (*) "(" E ")" [")"], + T = (*) "N" ["-"], + T = (*) "(" E ")" ["-"] + ], + shifts: [ + ("(", S8), + ("N", S9) + ], + gotos: [ + (T, S14) + ] + }, + State { + items: [ + T = "(" E ")" (*) [EOF], + T = "(" E ")" (*) ["-"] + ], + shifts: [], + gotos: [] + }, + State { + items: [ + T = "(" E (*) ")" [")"], + T = "(" E (*) ")" ["-"], + E = E (*) "-" T [")"], + E = E (*) "-" T ["-"] + ], + shifts: [ + (")", S15), + ("-", S11) + ], + gotos: [] + }, + State { + items: [ + E = E "-" T (*) [")"], + E = E "-" T (*) ["-"] + ], + shifts: [], + gotos: [] + }, + State { + items: [ + T = "(" E ")" (*) [")"], + T = "(" E ")" (*) ["-"] + ], + shifts: [], + gotos: [] + } +]"#); + +} diff --git a/src/util.rs b/src/util.rs index 042ca64..eeab167 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,4 +1,7 @@ +use std::collections::{hash_map, HashMap, HashSet}; use std::fmt::{Display, Formatter, Error}; +use std::hash::Hash; +use std::iter::FromIterator; pub struct Sep(pub &'static str, pub S); @@ -29,3 +32,49 @@ impl<'a,S:Display> Display for Prefix<&'a [S]> { } } +pub struct Multimap { + map: HashMap> +} + +impl Multimap { + pub fn new() -> Multimap { + Multimap { map: map() } + } + + pub fn push(&mut self, key: K, value: V) { + self.map.entry(key).or_insert(vec![]).push(value); + } + + pub fn get(&self, key: &K) -> &[V] { + match self.map.get(key) { + Some(v) => v, + None => &[] + } + } + + pub fn into_iter(self) -> hash_map::IntoIter> { + self.map.into_iter() + } +} + +impl FromIterator<(K,V)> for Multimap { + fn from_iter(iterator: T) -> Self where T: IntoIterator { + let mut map = Multimap::new(); + for (key, value) in iterator { + map.push(key, value); + } + map + } +} + +pub type Map = HashMap; + +pub fn map() -> HashMap { + HashMap::new() +} + +pub type Set = HashSet; + +pub fn set() -> HashSet { + HashSet::new() +}