lr1 construction seems to be working, tests are not yet really stable

This commit is contained in:
Niko Matsakis 2015-06-19 17:28:03 -04:00
parent 990f2a931a
commit c66f7e2d40
6 changed files with 369 additions and 71 deletions

View File

@ -5,9 +5,8 @@
*/
use intern::InternedString;
use std::collections::HashMap;
use std::fmt::{Debug, Display, Formatter, Error};
use util::Sep;
use util::{map, Map, Sep};
// These concepts we re-use wholesale
pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
@ -15,8 +14,8 @@ pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
#[derive(Clone, Debug)]
pub struct Grammar {
pub action_fn_defns: Vec<ActionFnDefn>,
pub productions: HashMap<NonterminalString, Vec<Production>>,
pub conversions: HashMap<TerminalString, TerminalString>,
pub productions: Map<NonterminalString, Vec<Production>>,
pub conversions: Map<TerminalString, TerminalString>,
pub types: Types,
}
@ -54,13 +53,13 @@ pub enum TypeRepr {
#[derive(Clone, Debug)]
pub struct Types {
terminal_type: TypeRepr,
nonterminal_types: HashMap<NonterminalString, TypeRepr>
nonterminal_types: Map<NonterminalString, TypeRepr>
}
impl Types {
pub fn new(terminal_type: TypeRepr) -> Types {
Types { terminal_type: terminal_type,
nonterminal_types: HashMap::new() }
nonterminal_types: map() }
}
pub fn add_type(&mut self, nt_id: NonterminalString, ty: TypeRepr) {
@ -173,7 +172,7 @@ impl Grammar {
types: Types)
-> Grammar
{
let mut productions = HashMap::new();
let mut productions = map();
for production in flat_productions {
let mut vec = productions.entry(production.nonterminal).or_insert(vec![]);

View File

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::cell::RefCell;
use std::fmt::{Debug, Display, Error, Formatter};
use std::cmp::{PartialOrd, Ord, Ordering};
use util::{map, Map};
#[cfg(test)]
mod test;
@ -50,7 +51,7 @@ fn write<F,R>(f: F) -> R
impl Interner {
fn new() -> Interner {
Interner { map: HashMap::new(), strings: vec![] }
Interner { map: map(), strings: vec![] }
}
pub fn data(&self, i: InternedString) -> &str {

View File

@ -2,6 +2,7 @@
use grammar::repr::*;
use std::collections::{HashMap, HashSet};
use util::{map, Map};
use super::Lookahead;
@ -16,7 +17,7 @@ pub type FirstSet = HashSet<Option<TerminalString>>;
impl FirstSets {
pub fn new(grammar: &Grammar) -> FirstSets {
let mut this = FirstSets { map: HashMap::new() };
let mut this = FirstSets { map: map() };
let mut changed = true;
while changed {
changed = false;

View File

@ -4,7 +4,7 @@ use grammar::repr::*;
use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Formatter, Error};
use std::rc::Rc;
use util::Prefix;
use util::{map, Map, Multimap, Set, Prefix};
mod first;
@ -15,15 +15,16 @@ struct LR1<'grammar> {
first_sets: first::FirstSets,
}
#[derive(Debug)]
struct State<'grammar> {
configurations: Configurations<'grammar>,
shifts: HashMap<TerminalString, StateIndex>,
gotos: HashMap<NonterminalString, StateIndex>,
items: Items<'grammar>,
shifts: Vec<(TerminalString, StateIndex)>,
gotos: Vec<(NonterminalString, StateIndex)>,
}
type Configurations<'grammar> = Rc<Vec<Configuration<'grammar>>>;
type Items<'grammar> = Rc<Vec<Item<'grammar>>>;
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct StateIndex(usize);
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
@ -33,7 +34,7 @@ enum Lookahead {
}
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
struct Configuration<'grammar> {
struct Item<'grammar> {
production: &'grammar Production,
index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C
lookahead: Lookahead,
@ -41,7 +42,7 @@ struct Configuration<'grammar> {
struct StateSet<'grammar> {
states: Vec<State<'grammar>>,
state_map: HashMap<Configurations<'grammar>, StateIndex>,
state_map: Map<Items<'grammar>, StateIndex>,
}
impl<'grammar> LR1<'grammar> {
@ -52,30 +53,38 @@ impl<'grammar> LR1<'grammar> {
}
}
fn build_states(&mut self, start_nt: NonterminalString) -> Vec<State<'grammar>> {
fn build_states(&self, start_nt: NonterminalString) -> Vec<State<'grammar>> {
let mut state_set = StateSet::new();
// create the starting state
state_set.add_state(
self.transitive_closure(
self.configurations(start_nt, 0, Lookahead::EOF)));
self.items(start_nt, 0, Lookahead::EOF)));
let mut counter = 0;
while counter < state_set.states.len() {
let configurations = state_set.states[counter].configurations.clone();
counter += 1;
let items = state_set.states[counter].items.clone();
// for each configuration where we can shift, do so, and
// create the transitive closure of the resulting state
let shifted_configurations =
configurations
.iter()
.filter_map(|configuration| configuration.shifted_configuration())
.map(|configuration| self.transitive_closure(vec![configuration]));
// group the items that we can transition into by shifting
// over a term or nonterm
let transitions: Multimap<Symbol, Item<'grammar>> =
items.iter()
.filter_map(|item| item.shifted_item())
.collect();
// add a state for each of those cases where we did a shift
for configuration in shifted_configurations {
state_set.add_state(configuration);
for (symbol, items) in transitions.into_iter() {
let items = self.transitive_closure(items);
let next_state = state_set.add_state(items);
// FIXME check for conflicts
match symbol {
Symbol::Terminal(t) => {
state_set.states[counter].shifts.push((t, next_state));
}
Symbol::Nonterminal(t) => {
state_set.states[counter].gotos.push((t, next_state));
}
}
}
// extract a new state
@ -85,17 +94,17 @@ impl<'grammar> LR1<'grammar> {
state_set.states
}
fn configurations(&self,
fn items(&self,
id: NonterminalString,
index: usize,
lookahead: Lookahead)
-> Vec<Configuration<'grammar>>
-> Vec<Item<'grammar>>
{
self.grammar.productions_for(id)
.iter()
.map(|production| {
debug_assert!(index <= production.symbols.len());
Configuration { production: production,
Item { production: production,
index: index,
lookahead: lookahead }
})
@ -103,45 +112,45 @@ impl<'grammar> LR1<'grammar> {
}
// expands `state` with epsilon moves
fn transitive_closure(&self, mut configurations: Vec<Configuration<'grammar>>)
-> Configurations<'grammar>
fn transitive_closure(&self, mut items: Vec<Item<'grammar>>)
-> Items<'grammar>
{
let mut counter = 0;
let mut set: HashSet<Configuration<'grammar>> =
configurations.iter().cloned().collect();
let mut set: Set<Item<'grammar>> =
items.iter().cloned().collect();
while counter < configurations.len() {
let new_configurations: Vec<_> =
configurations[counter..]
while counter < items.len() {
let new_items: Vec<_> =
items[counter..]
.iter()
.filter_map(|configuration| {
let shift_symbol = configuration.shift_symbol();
.filter_map(|item| {
let shift_symbol = item.shift_symbol();
match shift_symbol {
None => None, // requires a reduce
Some((Symbol::Terminal(_), _)) => None, // requires a shift
Some((Symbol::Nonterminal(nt), remainder)) => {
Some((nt, remainder, configuration.lookahead))
Some((nt, remainder, item.lookahead))
}
}
})
.flat_map(|(nt, remainder, lookahead)| {
let first_set = self.first_sets.first(remainder, lookahead);
first_set.into_iter()
.flat_map(move |l| self.configurations(nt, 0, l))
.flat_map(move |l| self.items(nt, 0, l))
})
.filter(|&configuration| set.insert(configuration))
.filter(|&item| set.insert(item))
.collect();
counter = configurations.len();
configurations.extend(new_configurations);
counter = items.len();
items.extend(new_items);
}
Rc::new(configurations)
Rc::new(items)
}
}
impl<'grammar> Configuration<'grammar> {
impl<'grammar> Item<'grammar> {
fn can_shift(&self) -> bool {
self.index < self.production.symbols.len()
}
@ -150,11 +159,12 @@ impl<'grammar> Configuration<'grammar> {
self.index == self.production.symbols.len()
}
fn shifted_configuration(&self) -> Option<Configuration<'grammar>> {
fn shifted_item(&self) -> Option<(Symbol, Item<'grammar>)> {
if self.can_shift() {
Some(Configuration { production: self.production,
index: self.index + 1,
lookahead: self.lookahead })
Some((self.production.symbols[self.index],
Item { production: self.production,
index: self.index + 1,
lookahead: self.lookahead }))
} else {
None
}
@ -173,23 +183,23 @@ impl<'grammar> StateSet<'grammar> {
fn new() -> StateSet<'grammar> {
StateSet {
states: vec![],
state_map: HashMap::new(),
state_map: map(),
}
}
fn add_state(&mut self, configurations: Configurations<'grammar>) -> StateIndex {
fn add_state(&mut self, items: Items<'grammar>) -> StateIndex {
let states = &mut self.states;
*self.state_map.entry(configurations.clone()).or_insert_with(|| {
*self.state_map.entry(items.clone()).or_insert_with(|| {
let index = StateIndex(states.len());
states.push(State { configurations: configurations,
shifts: HashMap::new(),
gotos: HashMap::new() });
states.push(State { items: items,
shifts: Vec::new(),
gotos: Vec::new() });
index
})
}
}
impl<'grammar> Debug for Configuration<'grammar> {
impl<'grammar> Debug for Item<'grammar> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{} ={} (*){} [{:?}]",
self.production.nonterminal,
@ -207,3 +217,9 @@ impl Debug for Lookahead {
}
}
}
impl Debug for StateIndex {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "S{}", self.0)
}
}

View File

@ -1,21 +1,21 @@
use intern::intern;
use grammar::repr::*;
use test_util::{expect_debug, normalized_grammar};
use super::{Configuration, Configurations, Lookahead, LR1};
use super::{Items, Lookahead, LR1};
use super::Lookahead::EOF;
fn nt(t: &str) -> NonterminalString {
NonterminalString(intern(t))
}
fn configurations<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Lookahead)
-> Configurations<'g>
fn items<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Lookahead)
-> Items<'g>
{
let lr1 = LR1::new(&grammar);
let configurations =
let items =
lr1.transitive_closure(
lr1.configurations(nt(nonterminal), index, la));
configurations
lr1.items(nt(nonterminal), index, la));
items
}
#[test]
@ -30,8 +30,8 @@ grammar Foo {
};
}
"#);
let configurations = configurations(&grammar, "A", 0, EOF);
expect_debug(configurations, r#"[
let items = items(&grammar, "A", 0, EOF);
expect_debug(items, r#"[
A = (*) B "C" [EOF],
B = (*) "D" ["C"],
B = (*) ["C"]
@ -55,7 +55,7 @@ grammar Foo {
}
"#);
expect_debug(configurations(&grammar, "A", 0, EOF), r#"[
expect_debug(items(&grammar, "A", 0, EOF), r#"[
A = (*) B C [EOF],
B = (*) "B1" ["C1"],
B = (*) ["C1"],
@ -63,9 +63,241 @@ grammar Foo {
B = (*) [EOF]
]"#);
expect_debug(configurations(&grammar, "A", 1, EOF), r#"[
expect_debug(items(&grammar, "A", 1, EOF), r#"[
A = B (*) C [EOF],
C = (*) "C1" [EOF],
C = (*) [EOF]
]"#);
}
#[test]
fn expr_grammar1() {
let grammar = normalized_grammar(r#"
grammar Foo {
token Tok where { };
S: () =
E => ();
E: () = {
E "-" T => ();
T => ();
};
T: () = {
"N" => ();
"(" E ")" => ();
};
}
"#);
let lr1 = LR1::new(&grammar);
let mut states = lr1.build_states(nt("S"));
for state in &mut states {
state.shifts.sort();
state.gotos.sort();
}
expect_debug(&states, r#"[
State {
items: [
S = (*) E [EOF],
E = (*) E "-" T [EOF],
E = (*) T [EOF],
E = (*) E "-" T ["-"],
E = (*) T ["-"],
T = (*) "N" [EOF],
T = (*) "(" E ")" [EOF],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S4),
("N", S3)
],
gotos: [
(E, S2),
(T, S1)
]
},
State {
items: [
E = T (*) [EOF],
E = T (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
S = E (*) [EOF],
E = E (*) "-" T [EOF],
E = E (*) "-" T ["-"]
],
shifts: [
("-", S5)
],
gotos: []
},
State {
items: [
T = "N" (*) [EOF],
T = "N" (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
T = "(" (*) E ")" [EOF],
T = "(" (*) E ")" ["-"],
E = (*) E "-" T [")"],
E = (*) T [")"],
E = (*) E "-" T ["-"],
E = (*) T ["-"],
T = (*) "N" [")"],
T = (*) "(" E ")" [")"],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S8),
("N", S9)
],
gotos: [
(E, S7),
(T, S6)
]
},
State {
items: [
E = E "-" (*) T [EOF],
E = E "-" (*) T ["-"],
T = (*) "N" [EOF],
T = (*) "(" E ")" [EOF],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S4),
("N", S3)
],
gotos: [
(T, S10)
]
},
State {
items: [
E = T (*) [")"],
E = T (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
T = "(" E (*) ")" [EOF],
T = "(" E (*) ")" ["-"],
E = E (*) "-" T [")"],
E = E (*) "-" T ["-"]
],
shifts: [
(")", S12),
("-", S11)
],
gotos: []
},
State {
items: [
T = "(" (*) E ")" [")"],
T = "(" (*) E ")" ["-"],
E = (*) E "-" T [")"],
E = (*) T [")"],
E = (*) E "-" T ["-"],
E = (*) T ["-"],
T = (*) "N" [")"],
T = (*) "(" E ")" [")"],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S8),
("N", S9)
],
gotos: [
(E, S13),
(T, S6)
]
},
State {
items: [
T = "N" (*) [")"],
T = "N" (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
E = E "-" T (*) [EOF],
E = E "-" T (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
E = E "-" (*) T [")"],
E = E "-" (*) T ["-"],
T = (*) "N" [")"],
T = (*) "(" E ")" [")"],
T = (*) "N" ["-"],
T = (*) "(" E ")" ["-"]
],
shifts: [
("(", S8),
("N", S9)
],
gotos: [
(T, S14)
]
},
State {
items: [
T = "(" E ")" (*) [EOF],
T = "(" E ")" (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
T = "(" E (*) ")" [")"],
T = "(" E (*) ")" ["-"],
E = E (*) "-" T [")"],
E = E (*) "-" T ["-"]
],
shifts: [
(")", S15),
("-", S11)
],
gotos: []
},
State {
items: [
E = E "-" T (*) [")"],
E = E "-" T (*) ["-"]
],
shifts: [],
gotos: []
},
State {
items: [
T = "(" E ")" (*) [")"],
T = "(" E ")" (*) ["-"]
],
shifts: [],
gotos: []
}
]"#);
}

View File

@ -1,4 +1,7 @@
use std::collections::{hash_map, HashMap, HashSet};
use std::fmt::{Display, Formatter, Error};
use std::hash::Hash;
use std::iter::FromIterator;
pub struct Sep<S>(pub &'static str, pub S);
@ -29,3 +32,49 @@ impl<'a,S:Display> Display for Prefix<&'a [S]> {
}
}
pub struct Multimap<K,V> {
map: HashMap<K,Vec<V>>
}
impl<K:Hash+Eq,V> Multimap<K,V> {
pub fn new() -> Multimap<K,V> {
Multimap { map: map() }
}
pub fn push(&mut self, key: K, value: V) {
self.map.entry(key).or_insert(vec![]).push(value);
}
pub fn get(&self, key: &K) -> &[V] {
match self.map.get(key) {
Some(v) => v,
None => &[]
}
}
pub fn into_iter(self) -> hash_map::IntoIter<K, Vec<V>> {
self.map.into_iter()
}
}
impl<K:Hash+Eq,V> FromIterator<(K,V)> for Multimap<K,V> {
fn from_iter<T>(iterator: T) -> Self where T: IntoIterator<Item=(K,V)> {
let mut map = Multimap::new();
for (key, value) in iterator {
map.push(key, value);
}
map
}
}
pub type Map<K,V> = HashMap<K,V>;
pub fn map<K:Hash+Eq,V>() -> HashMap<K,V> {
HashMap::new()
}
pub type Set<K> = HashSet<K>;
pub fn set<K:Hash+Eq>() -> HashSet<K> {
HashSet::new()
}