Start the naive LR(1) algorithm with support for transitive closures

over epsilon moves
This commit is contained in:
Niko Matsakis 2015-06-19 10:17:35 -04:00
parent 4e5204078a
commit 8c577422bf
12 changed files with 240 additions and 72 deletions

View File

@ -71,7 +71,7 @@ pub struct Grammar {
pub items: Vec<GrammarItem>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Span(pub usize, pub usize);
#[derive(Clone, Debug, PartialEq, Eq)]

View File

@ -9,7 +9,7 @@ use std::collections::HashMap;
use std::fmt::{Debug, Display, Formatter, Error};
use util::Sep;
// These concepts we re-use wholesale
// These concepts we re-use wholesale
pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
#[derive(Clone, Debug)]
@ -20,7 +20,7 @@ pub struct Grammar {
pub types: Types,
}
#[derive(Clone, PartialEq, Eq)]
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Production {
// this overlaps with the key in the hashmap, obviously, but it's
// handy to have it
@ -30,7 +30,7 @@ pub struct Production {
pub span: Span,
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Symbol {
Nonterminal(NonterminalString),
Terminal(TerminalString),
@ -101,7 +101,7 @@ impl Debug for TypeRepr {
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[derive(Copy, Clone, Debug, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub struct ActionFn(u32);
impl ActionFn {

View File

@ -5,6 +5,9 @@ use std::collections::{HashMap, HashSet};
use super::Lookahead;
#[cfg(test)]
mod test;
pub struct FirstSets {
map: HashMap<NonterminalString, FirstSet>
}
@ -34,10 +37,6 @@ impl FirstSets {
this
}
pub fn first_set(&self, nt: NonterminalString) -> &FirstSet {
&self.map[&nt]
}
pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
let mut result = vec![];
@ -82,58 +81,3 @@ impl FirstSets {
}
}
mod test {
use intern::intern;
use normalize::normalize;
use parser;
use grammar::repr::*;
use lr1::Lookahead;
use lr1::Lookahead::EOF;
use super::FirstSets;
fn nt(t: &str) -> Symbol {
Symbol::Nonterminal(NonterminalString(intern(t)))
}
fn t(t: &str) -> Symbol {
Symbol::Terminal(TerminalString(intern(t)))
}
fn la(t: &str) -> Lookahead {
Lookahead::Terminal(TerminalString(intern(t)))
}
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
let mut v = first.first(symbols, lookahead);
v.sort();
v
}
#[test]
fn basic() {
let grammar = parser::parse_grammar(r#"
grammar Foo {
token Tok where { };
A = B "C";
B: Option<u32> = {
"D" => Some(1);
=> None;
};
}
"#).unwrap();
let grammar = normalize(grammar).unwrap();
let first_sets = FirstSets::new(&grammar);
assert_eq!(
first(&first_sets, &[nt("A")], EOF),
vec![la("C"), la("D")]);
assert_eq!(
first(&first_sets, &[nt("B")], EOF),
vec![EOF, la("D")]);
assert_eq!(
first(&first_sets, &[nt("B"), t("E")], EOF),
vec![la("D"), la("E")]);
}
}

51
src/lr1/first/test.rs Normal file
View File

@ -0,0 +1,51 @@
use intern::intern;
use grammar::repr::*;
use lr1::Lookahead;
use lr1::Lookahead::EOF;
use test_util::{normalized_grammar};
use super::FirstSets;
pub fn nt(t: &str) -> Symbol {
Symbol::Nonterminal(NonterminalString(intern(t)))
}
pub fn term(t: &str) -> Symbol {
Symbol::Terminal(TerminalString(intern(t)))
}
fn la(t: &str) -> Lookahead {
Lookahead::Terminal(TerminalString(intern(t)))
}
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
let mut v = first.first(symbols, lookahead);
v.sort();
v
}
#[test]
fn basic() {
let grammar = normalized_grammar(r#"
grammar Foo {
token Tok where { };
A = B "C";
B: Option<u32> = {
"D" => Some(1);
=> None;
};
}
"#);
let first_sets = FirstSets::new(&grammar);
assert_eq!(
first(&first_sets, &[nt("A")], EOF),
vec![la("C"), la("D")]);
assert_eq!(
first(&first_sets, &[nt("B")], EOF),
vec![EOF, la("D")]);
assert_eq!(
first(&first_sets, &[nt("B"), term("E")], EOF),
vec![la("D"), la("E")]);
}

View File

@ -1,10 +1,14 @@
//! Naive LR(1) generation algorithm.
use grammar::repr::*;
use std::collections::{HashMap};
use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Formatter, Error};
use util::Prefix;
mod first;
#[cfg(test)] mod test;
struct LR1<'grammar> {
grammar: &'grammar Grammar,
states: Vec<State<'grammar>>,
@ -12,7 +16,7 @@ struct LR1<'grammar> {
}
struct State<'grammar> {
items: Vec<Configuration<'grammar>>,
configurations: Vec<Configuration<'grammar>>,
shifts: HashMap<TerminalString, StateIndex>,
gotos: HashMap<NonterminalString, StateIndex>,
}
@ -20,12 +24,13 @@ struct State<'grammar> {
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct StateIndex(usize);
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
enum Lookahead {
EOF,
Terminal(TerminalString),
}
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
struct Configuration<'grammar> {
production: &'grammar Production,
index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C
@ -41,5 +46,110 @@ impl<'grammar> LR1<'grammar> {
}
}
fn build_states(&mut self, start_nt: NonterminalString) {
debug_assert!(self.states.is_empty());
let state0 = self.start_state(start_nt, Lookahead::EOF);
self.states.push(state0);
}
fn start_state(&self, id: NonterminalString, lookahead: Lookahead) -> State<'grammar> {
let configurations =
self.transitive_closure(
self.start_configurations(id, lookahead));
State { configurations: configurations,
shifts: HashMap::new(),
gotos: HashMap::new() }
}
fn start_configurations(&self,
id: NonterminalString,
lookahead: Lookahead)
-> Vec<Configuration<'grammar>>
{
self.grammar.productions_for(id)
.iter()
.map(|production| {
Configuration { production: production,
index: 0,
lookahead: lookahead }
})
.collect()
}
// expands `state` with epsilon moves
fn transitive_closure(&self, mut configurations: Vec<Configuration<'grammar>>)
-> Vec<Configuration<'grammar>>
{
println!("expand_configurations({:?})", configurations);
let mut counter = 0;
let mut set: HashSet<Configuration<'grammar>> =
configurations.iter().cloned().collect();
while counter < configurations.len() {
println!("expand_configurations: counter={:?}", counter);
let new_configurations: Vec<_> =
configurations[counter..]
.iter()
.filter_map(|configuration| {
let shift_symbol = configuration.shift_symbol();
println!("expand_configurations: configuration: {:?} shift_symbol: {:?}",
configuration, shift_symbol);
match shift_symbol {
None => None, // requires a reduce
Some((Symbol::Terminal(_), _)) => None, // requires a shift
Some((Symbol::Nonterminal(nt), remainder)) => {
Some((nt, remainder, configuration.lookahead))
}
}
})
.flat_map(|(nt, remainder, lookahead)| {
let first_set = self.first_sets.first(remainder, lookahead);
println!("expand_configurations: ({:?}, {:?}, {:?}) first_set={:?}",
nt, remainder, lookahead, first_set);
first_set.into_iter()
.flat_map(move |l| self.start_configurations(nt, l))
})
.filter(|&configuration| set.insert(configuration))
.collect();
counter = configurations.len();
configurations.extend(new_configurations);
}
configurations
}
}
impl<'grammar> Configuration<'grammar> {
fn shift_symbol(&self) -> Option<(Symbol, &[Symbol])> {
if self.index == self.production.symbols.len() {
None
} else {
Some((self.production.symbols[self.index],
&self.production.symbols[self.index+1..]))
}
}
}
impl<'grammar> Debug for Configuration<'grammar> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{} ={} (*){} [{:?}]",
self.production.nonterminal,
Prefix(" ", &self.production.symbols[..self.index]),
Prefix(" ", &self.production.symbols[self.index..]),
self.lookahead)
}
}
impl Debug for Lookahead {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
Lookahead::EOF => write!(fmt, "EOF"),
Lookahead::Terminal(s) => write!(fmt, "{}", s),
}
}
}

40
src/lr1/test.rs Normal file
View File

@ -0,0 +1,40 @@
use intern::intern;
use grammar::repr::*;
use test_util::{expect_debug, normalized_grammar};
use super::{Configuration, Lookahead, LR1};
use super::Lookahead::EOF;
fn nt(t: &str) -> NonterminalString {
NonterminalString(intern(t))
}
fn configurations<'g>(grammar: &'g Grammar, nonterminal: &str, la: Lookahead)
-> Vec<Configuration<'g>>
{
let lr1 = LR1::new(&grammar);
let configurations =
lr1.transitive_closure(
lr1.start_configurations(nt(nonterminal), la));
configurations
}
#[test]
fn start_state() {
let grammar = normalized_grammar(r#"
grammar Foo {
token Tok where { };
A = B "C";
B: Option<u32> = {
"D" => Some(1);
=> None;
};
}
"#);
let configurations = configurations(&grammar, "A", EOF);
expect_debug(configurations, r#"[
A = (*) B "C" [EOF],
B = (*) "D" ["C"],
B = (*) ["C"]
]"#);
}

View File

@ -1,5 +1,9 @@
// Need this for rusty_peg
#![recursion_limit="256"]
// I hate this lint.
#![allow(unused_parens)]
#[macro_use]
extern crate rusty_peg;
extern crate diff;
@ -13,6 +17,9 @@ mod normalize;
mod parser;
mod util;
#[cfg(test)]
mod test_util;
#[cfg(not(test))]
fn main() {
println!("Hello, world!");

View File

@ -1,7 +1,7 @@
use grammar::repr::{Grammar, Production};
use normalize::normalize;
use normalize::test_util::expect_debug;
use parser;
use test_util::expect_debug;
fn flat_productions(grammar: &Grammar) -> Vec<Production> {
let mut productions: Vec<_> =

View File

@ -1,5 +1,5 @@
use parser;
use normalize::test_util::compare;
use test_util::compare;
use super::expand_macros;

View File

@ -60,8 +60,5 @@ mod lower;
///////////////////////////////////////////////////////////////////////////
// Shared routines
#[cfg(test)]
mod test_util;
mod norm_util;

View File

@ -1,4 +1,6 @@
use diff;
use grammar::repr as r;
use intern::intern;
use regex::Regex;
use std::fmt::{Debug, Formatter, Error};
@ -44,3 +46,6 @@ pub fn compare<D:Debug,E:Debug>(actual: D, expected: E) {
});
}
pub fn normalized_grammar(s: &str) -> r::Grammar {
::normalize::normalize(::parser::parse_grammar(s).unwrap()).unwrap()
}

View File

@ -15,3 +15,17 @@ impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
Ok(())
}
}
pub struct Prefix<S>(pub &'static str, pub S);
impl<'a,S:Display> Display for Prefix<&'a [S]> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
let &Prefix(prefix, vec) = self;
let mut elems = vec.iter();
while let Some(elem) = elems.next() {
try!(write!(fmt, "{}{}", prefix, elem));
}
Ok(())
}
}