lr1 construction seems to be working, tests are not yet really stable

This commit is contained in:
Niko Matsakis 2015-06-19 17:28:03 -04:00
parent 990f2a931a
commit c66f7e2d40
6 changed files with 369 additions and 71 deletions

View File

@ -5,9 +5,8 @@
*/ */
use intern::InternedString; use intern::InternedString;
use std::collections::HashMap;
use std::fmt::{Debug, Display, Formatter, Error}; use std::fmt::{Debug, Display, Formatter, Error};
use util::Sep; use util::{map, Map, Sep};
// These concepts we re-use wholesale // These concepts we re-use wholesale
pub use grammar::parse_tree::{NonterminalString, Span, TerminalString}; pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
@ -15,8 +14,8 @@ pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Grammar { pub struct Grammar {
pub action_fn_defns: Vec<ActionFnDefn>, pub action_fn_defns: Vec<ActionFnDefn>,
pub productions: HashMap<NonterminalString, Vec<Production>>, pub productions: Map<NonterminalString, Vec<Production>>,
pub conversions: HashMap<TerminalString, TerminalString>, pub conversions: Map<TerminalString, TerminalString>,
pub types: Types, pub types: Types,
} }
@ -54,13 +53,13 @@ pub enum TypeRepr {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Types { pub struct Types {
terminal_type: TypeRepr, terminal_type: TypeRepr,
nonterminal_types: HashMap<NonterminalString, TypeRepr> nonterminal_types: Map<NonterminalString, TypeRepr>
} }
impl Types { impl Types {
pub fn new(terminal_type: TypeRepr) -> Types { pub fn new(terminal_type: TypeRepr) -> Types {
Types { terminal_type: terminal_type, Types { terminal_type: terminal_type,
nonterminal_types: HashMap::new() } nonterminal_types: map() }
} }
pub fn add_type(&mut self, nt_id: NonterminalString, ty: TypeRepr) { pub fn add_type(&mut self, nt_id: NonterminalString, ty: TypeRepr) {
@ -173,7 +172,7 @@ impl Grammar {
types: Types) types: Types)
-> Grammar -> Grammar
{ {
let mut productions = HashMap::new(); let mut productions = map();
for production in flat_productions { for production in flat_productions {
let mut vec = productions.entry(production.nonterminal).or_insert(vec![]); let mut vec = productions.entry(production.nonterminal).or_insert(vec![]);

View File

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::cell::RefCell; use std::cell::RefCell;
use std::fmt::{Debug, Display, Error, Formatter}; use std::fmt::{Debug, Display, Error, Formatter};
use std::cmp::{PartialOrd, Ord, Ordering}; use std::cmp::{PartialOrd, Ord, Ordering};
use util::{map, Map};
#[cfg(test)] #[cfg(test)]
mod test; mod test;
@ -50,7 +51,7 @@ fn write<F,R>(f: F) -> R
impl Interner { impl Interner {
fn new() -> Interner { fn new() -> Interner {
Interner { map: HashMap::new(), strings: vec![] } Interner { map: map(), strings: vec![] }
} }
pub fn data(&self, i: InternedString) -> &str { pub fn data(&self, i: InternedString) -> &str {

View File

@ -2,6 +2,7 @@
use grammar::repr::*; use grammar::repr::*;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use util::{map, Map};
use super::Lookahead; use super::Lookahead;
@ -16,7 +17,7 @@ pub type FirstSet = HashSet<Option<TerminalString>>;
impl FirstSets { impl FirstSets {
pub fn new(grammar: &Grammar) -> FirstSets { pub fn new(grammar: &Grammar) -> FirstSets {
let mut this = FirstSets { map: HashMap::new() }; let mut this = FirstSets { map: map() };
let mut changed = true; let mut changed = true;
while changed { while changed {
changed = false; changed = false;

View File

@ -4,7 +4,7 @@ use grammar::repr::*;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Formatter, Error}; use std::fmt::{Debug, Formatter, Error};
use std::rc::Rc; use std::rc::Rc;
use util::Prefix; use util::{map, Map, Multimap, Set, Prefix};
mod first; mod first;
@ -15,15 +15,16 @@ struct LR1<'grammar> {
first_sets: first::FirstSets, first_sets: first::FirstSets,
} }
#[derive(Debug)]
struct State<'grammar> { struct State<'grammar> {
configurations: Configurations<'grammar>, items: Items<'grammar>,
shifts: HashMap<TerminalString, StateIndex>, shifts: Vec<(TerminalString, StateIndex)>,
gotos: HashMap<NonterminalString, StateIndex>, gotos: Vec<(NonterminalString, StateIndex)>,
} }
type Configurations<'grammar> = Rc<Vec<Configuration<'grammar>>>; type Items<'grammar> = Rc<Vec<Item<'grammar>>>;
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct StateIndex(usize); struct StateIndex(usize);
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
@ -33,7 +34,7 @@ enum Lookahead {
} }
#[derive(Copy, Clone, Hash, PartialEq, Eq)] #[derive(Copy, Clone, Hash, PartialEq, Eq)]
struct Configuration<'grammar> { struct Item<'grammar> {
production: &'grammar Production, production: &'grammar Production,
index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C
lookahead: Lookahead, lookahead: Lookahead,
@ -41,7 +42,7 @@ struct Configuration<'grammar> {
struct StateSet<'grammar> { struct StateSet<'grammar> {
states: Vec<State<'grammar>>, states: Vec<State<'grammar>>,
state_map: HashMap<Configurations<'grammar>, StateIndex>, state_map: Map<Items<'grammar>, StateIndex>,
} }
impl<'grammar> LR1<'grammar> { impl<'grammar> LR1<'grammar> {
@ -52,30 +53,38 @@ impl<'grammar> LR1<'grammar> {
} }
} }
fn build_states(&mut self, start_nt: NonterminalString) -> Vec<State<'grammar>> { fn build_states(&self, start_nt: NonterminalString) -> Vec<State<'grammar>> {
let mut state_set = StateSet::new(); let mut state_set = StateSet::new();
// create the starting state // create the starting state
state_set.add_state( state_set.add_state(
self.transitive_closure( self.transitive_closure(
self.configurations(start_nt, 0, Lookahead::EOF))); self.items(start_nt, 0, Lookahead::EOF)));
let mut counter = 0; let mut counter = 0;
while counter < state_set.states.len() { while counter < state_set.states.len() {
let configurations = state_set.states[counter].configurations.clone(); let items = state_set.states[counter].items.clone();
counter += 1;
// for each configuration where we can shift, do so, and // group the items that we can transition into by shifting
// create the transitive closure of the resulting state // over a term or nonterm
let shifted_configurations = let transitions: Multimap<Symbol, Item<'grammar>> =
configurations items.iter()
.iter() .filter_map(|item| item.shifted_item())
.filter_map(|configuration| configuration.shifted_configuration()) .collect();
.map(|configuration| self.transitive_closure(vec![configuration]));
// add a state for each of those cases where we did a shift for (symbol, items) in transitions.into_iter() {
for configuration in shifted_configurations { let items = self.transitive_closure(items);
state_set.add_state(configuration); let next_state = state_set.add_state(items);
// FIXME check for conflicts
match symbol {
Symbol::Terminal(t) => {
state_set.states[counter].shifts.push((t, next_state));
}
Symbol::Nonterminal(t) => {
state_set.states[counter].gotos.push((t, next_state));
}
}
} }
// extract a new state // extract a new state
@ -85,17 +94,17 @@ impl<'grammar> LR1<'grammar> {
state_set.states state_set.states
} }
fn configurations(&self, fn items(&self,
id: NonterminalString, id: NonterminalString,
index: usize, index: usize,
lookahead: Lookahead) lookahead: Lookahead)
-> Vec<Configuration<'grammar>> -> Vec<Item<'grammar>>
{ {
self.grammar.productions_for(id) self.grammar.productions_for(id)
.iter() .iter()
.map(|production| { .map(|production| {
debug_assert!(index <= production.symbols.len()); debug_assert!(index <= production.symbols.len());
Configuration { production: production, Item { production: production,
index: index, index: index,
lookahead: lookahead } lookahead: lookahead }
}) })
@ -103,45 +112,45 @@ impl<'grammar> LR1<'grammar> {
} }
// expands `state` with epsilon moves // expands `state` with epsilon moves
fn transitive_closure(&self, mut configurations: Vec<Configuration<'grammar>>) fn transitive_closure(&self, mut items: Vec<Item<'grammar>>)
-> Configurations<'grammar> -> Items<'grammar>
{ {
let mut counter = 0; let mut counter = 0;
let mut set: HashSet<Configuration<'grammar>> = let mut set: Set<Item<'grammar>> =
configurations.iter().cloned().collect(); items.iter().cloned().collect();
while counter < configurations.len() { while counter < items.len() {
let new_configurations: Vec<_> = let new_items: Vec<_> =
configurations[counter..] items[counter..]
.iter() .iter()
.filter_map(|configuration| { .filter_map(|item| {
let shift_symbol = configuration.shift_symbol(); let shift_symbol = item.shift_symbol();
match shift_symbol { match shift_symbol {
None => None, // requires a reduce None => None, // requires a reduce
Some((Symbol::Terminal(_), _)) => None, // requires a shift Some((Symbol::Terminal(_), _)) => None, // requires a shift
Some((Symbol::Nonterminal(nt), remainder)) => { Some((Symbol::Nonterminal(nt), remainder)) => {
Some((nt, remainder, configuration.lookahead)) Some((nt, remainder, item.lookahead))
} }
} }
}) })
.flat_map(|(nt, remainder, lookahead)| { .flat_map(|(nt, remainder, lookahead)| {
let first_set = self.first_sets.first(remainder, lookahead); let first_set = self.first_sets.first(remainder, lookahead);
first_set.into_iter() first_set.into_iter()
.flat_map(move |l| self.configurations(nt, 0, l)) .flat_map(move |l| self.items(nt, 0, l))
}) })
.filter(|&configuration| set.insert(configuration)) .filter(|&item| set.insert(item))
.collect(); .collect();
counter = configurations.len(); counter = items.len();
configurations.extend(new_configurations); items.extend(new_items);
} }
Rc::new(configurations) Rc::new(items)
} }
} }
impl<'grammar> Configuration<'grammar> { impl<'grammar> Item<'grammar> {
fn can_shift(&self) -> bool { fn can_shift(&self) -> bool {
self.index < self.production.symbols.len() self.index < self.production.symbols.len()
} }
@ -150,11 +159,12 @@ impl<'grammar> Configuration<'grammar> {
self.index == self.production.symbols.len() self.index == self.production.symbols.len()
} }
fn shifted_configuration(&self) -> Option<Configuration<'grammar>> { fn shifted_item(&self) -> Option<(Symbol, Item<'grammar>)> {
if self.can_shift() { if self.can_shift() {
Some(Configuration { production: self.production, Some((self.production.symbols[self.index],
Item { production: self.production,
index: self.index + 1, index: self.index + 1,
lookahead: self.lookahead }) lookahead: self.lookahead }))
} else { } else {
None None
} }
@ -173,23 +183,23 @@ impl<'grammar> StateSet<'grammar> {
fn new() -> StateSet<'grammar> { fn new() -> StateSet<'grammar> {
StateSet { StateSet {
states: vec![], states: vec![],
state_map: HashMap::new(), state_map: map(),
} }
} }
fn add_state(&mut self, configurations: Configurations<'grammar>) -> StateIndex { fn add_state(&mut self, items: Items<'grammar>) -> StateIndex {
let states = &mut self.states; let states = &mut self.states;
*self.state_map.entry(configurations.clone()).or_insert_with(|| { *self.state_map.entry(items.clone()).or_insert_with(|| {
let index = StateIndex(states.len()); let index = StateIndex(states.len());
states.push(State { configurations: configurations, states.push(State { items: items,
shifts: HashMap::new(), shifts: Vec::new(),
gotos: HashMap::new() }); gotos: Vec::new() });
index index
}) })
} }
} }
impl<'grammar> Debug for Configuration<'grammar> { impl<'grammar> Debug for Item<'grammar> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{} ={} (*){} [{:?}]", write!(fmt, "{} ={} (*){} [{:?}]",
self.production.nonterminal, self.production.nonterminal,
@ -207,3 +217,9 @@ impl Debug for Lookahead {
} }
} }
} }
impl Debug for StateIndex {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "S{}", self.0)
}
}

View File

@ -1,21 +1,21 @@
use intern::intern; use intern::intern;
use grammar::repr::*; use grammar::repr::*;
use test_util::{expect_debug, normalized_grammar}; use test_util::{expect_debug, normalized_grammar};
use super::{Configuration, Configurations, Lookahead, LR1}; use super::{Items, Lookahead, LR1};
use super::Lookahead::EOF; use super::Lookahead::EOF;
fn nt(t: &str) -> NonterminalString { fn nt(t: &str) -> NonterminalString {
NonterminalString(intern(t)) NonterminalString(intern(t))
} }
fn configurations<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Lookahead) fn items<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Lookahead)
-> Configurations<'g> -> Items<'g>
{ {
let lr1 = LR1::new(&grammar); let lr1 = LR1::new(&grammar);
let configurations = let items =
lr1.transitive_closure( lr1.transitive_closure(
lr1.configurations(nt(nonterminal), index, la)); lr1.items(nt(nonterminal), index, la));
configurations items
} }
#[test] #[test]
@ -30,8 +30,8 @@ grammar Foo {
}; };
} }
"#); "#);
let configurations = configurations(&grammar, "A", 0, EOF); let items = items(&grammar, "A", 0, EOF);
expect_debug(configurations, r#"[ expect_debug(items, r#"[
A = (*) B "C" [EOF], A = (*) B "C" [EOF],
B = (*) "D" ["C"], B = (*) "D" ["C"],
B = (*) ["C"] B = (*) ["C"]
@ -55,7 +55,7 @@ grammar Foo {
} }
"#); "#);
expect_debug(configurations(&grammar, "A", 0, EOF), r#"[ expect_debug(items(&grammar, "A", 0, EOF), r#"[
A = (*) B C [EOF], A = (*) B C [EOF],
B = (*) "B1" ["C1"], B = (*) "B1" ["C1"],
B = (*) ["C1"], B = (*) ["C1"],
@ -63,9 +63,241 @@ grammar Foo {
B = (*) [EOF] B = (*) [EOF]
]"#); ]"#);
expect_debug(configurations(&grammar, "A", 1, EOF), r#"[ expect_debug(items(&grammar, "A", 1, EOF), r#"[
A = B (*) C [EOF], A = B (*) C [EOF],
C = (*) "C1" [EOF], C = (*) "C1" [EOF],
C = (*) [EOF] C = (*) [EOF]
]"#); ]"#);
} }
#[test]
fn expr_grammar1() {
let grammar = normalized_grammar(r#"
grammar Foo {
token Tok where { };
S: () =
E => ();
E: () = {
E "-" T => ();
T => ();
};
T: () = {
"N" => ();
"(" E ")" => ();
};
}
"#);
let lr1 = LR1::new(&grammar);
let mut states = lr1.build_states(nt("S"));
for state in &mut states {
state.shifts.sort();
state.gotos.sort();
}
expect_debug(&states, r#"[
State {
items: [
S = (*) E [EOF],
E = (*) E "-" T [EOF],
E = (*) T [EOF],
E = (*) E "-" T ["-"],
E = (*) T ["-"],
T = (*) "N" [EOF],
T = (*) "(" E ")" [EOF],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S4),
("N", S3)
],
gotos: [
(E, S2),
(T, S1)
]
},
State {
items: [
E = T (*) [EOF],
E = T (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
S = E (*) [EOF],
E = E (*) "-" T [EOF],
E = E (*) "-" T ["-"]
],
shifts: [
("-", S5)
],
gotos: []
},
State {
items: [
T = "N" (*) [EOF],
T = "N" (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
T = "(" (*) E ")" [EOF],
T = "(" (*) E ")" ["-"],
E = (*) E "-" T [")"],
E = (*) T [")"],
E = (*) E "-" T ["-"],
E = (*) T ["-"],
T = (*) "N" [")"],
T = (*) "(" E ")" [")"],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S8),
("N", S9)
],
gotos: [
(E, S7),
(T, S6)
]
},
State {
items: [
E = E "-" (*) T [EOF],
E = E "-" (*) T ["-"],
T = (*) "N" [EOF],
T = (*) "(" E ")" [EOF],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S4),
("N", S3)
],
gotos: [
(T, S10)
]
},
State {
items: [
E = T (*) [")"],
E = T (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
T = "(" E (*) ")" [EOF],
T = "(" E (*) ")" ["-"],
E = E (*) "-" T [")"],
E = E (*) "-" T ["-"]
],
shifts: [
(")", S12),
("-", S11)
],
gotos: []
},
State {
items: [
T = "(" (*) E ")" [")"],
T = "(" (*) E ")" ["-"],
E = (*) E "-" T [")"],
E = (*) T [")"],
E = (*) E "-" T ["-"],
E = (*) T ["-"],
T = (*) "N" [")"],
T = (*) "(" E ")" [")"],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S8),
("N", S9)
],
gotos: [
(E, S13),
(T, S6)
]
},
State {
items: [
T = "N" (*) [")"],
T = "N" (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
E = E "-" T (*) [EOF],
E = E "-" T (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
E = E "-" (*) T [")"],
E = E "-" (*) T ["-"],
T = (*) "N" [")"],
T = (*) "(" E ")" [")"],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S8),
("N", S9)
],
gotos: [
(T, S14)
]
},
State {
items: [
T = "(" E ")" (*) [EOF],
T = "(" E ")" (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
T = "(" E (*) ")" [")"],
T = "(" E (*) ")" ["-"],
E = E (*) "-" T [")"],
E = E (*) "-" T ["-"]
],
shifts: [
(")", S15),
("-", S11)
],
gotos: []
},
State {
items: [
E = E "-" T (*) [")"],
E = E "-" T (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
T = "(" E ")" (*) [")"],
T = "(" E ")" (*) ["-"]
],
shifts: [],
gotos: []
}
]"#);
}

View File

@ -1,4 +1,7 @@
use std::collections::{hash_map, HashMap, HashSet};
use std::fmt::{Display, Formatter, Error}; use std::fmt::{Display, Formatter, Error};
use std::hash::Hash;
use std::iter::FromIterator;
pub struct Sep<S>(pub &'static str, pub S); pub struct Sep<S>(pub &'static str, pub S);
@ -29,3 +32,49 @@ impl<'a,S:Display> Display for Prefix<&'a [S]> {
} }
} }
pub struct Multimap<K,V> {
map: HashMap<K,Vec<V>>
}
impl<K:Hash+Eq,V> Multimap<K,V> {
pub fn new() -> Multimap<K,V> {
Multimap { map: map() }
}
pub fn push(&mut self, key: K, value: V) {
self.map.entry(key).or_insert(vec![]).push(value);
}
pub fn get(&self, key: &K) -> &[V] {
match self.map.get(key) {
Some(v) => v,
None => &[]
}
}
pub fn into_iter(self) -> hash_map::IntoIter<K, Vec<V>> {
self.map.into_iter()
}
}
impl<K:Hash+Eq,V> FromIterator<(K,V)> for Multimap<K,V> {
fn from_iter<T>(iterator: T) -> Self where T: IntoIterator<Item=(K,V)> {
let mut map = Multimap::new();
for (key, value) in iterator {
map.push(key, value);
}
map
}
}
pub type Map<K,V> = HashMap<K,V>;
pub fn map<K:Hash+Eq,V>() -> HashMap<K,V> {
HashMap::new()
}
pub type Set<K> = HashSet<K>;
pub fn set<K:Hash+Eq>() -> HashSet<K> {
HashSet::new()
}