mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-16 17:00:53 +00:00
Start the naive LR(1) algorithm with support for transitive closures
over epsilon moves
This commit is contained in:
parent
4e5204078a
commit
8c577422bf
@ -71,7 +71,7 @@ pub struct Grammar {
|
||||
pub items: Vec<GrammarItem>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Span(pub usize, pub usize);
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
|
@ -9,7 +9,7 @@ use std::collections::HashMap;
|
||||
use std::fmt::{Debug, Display, Formatter, Error};
|
||||
use util::Sep;
|
||||
|
||||
// These concepts we re-use wholesale
|
||||
// These concepts we re-use wholesale
|
||||
pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@ -20,7 +20,7 @@ pub struct Grammar {
|
||||
pub types: Types,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Production {
|
||||
// this overlaps with the key in the hashmap, obviously, but it's
|
||||
// handy to have it
|
||||
@ -30,7 +30,7 @@ pub struct Production {
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Symbol {
|
||||
Nonterminal(NonterminalString),
|
||||
Terminal(TerminalString),
|
||||
@ -101,7 +101,7 @@ impl Debug for TypeRepr {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Copy, Clone, Debug, Hash, PartialOrd, Ord, PartialEq, Eq)]
|
||||
pub struct ActionFn(u32);
|
||||
|
||||
impl ActionFn {
|
||||
|
@ -5,6 +5,9 @@ use std::collections::{HashMap, HashSet};
|
||||
|
||||
use super::Lookahead;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub struct FirstSets {
|
||||
map: HashMap<NonterminalString, FirstSet>
|
||||
}
|
||||
@ -34,10 +37,6 @@ impl FirstSets {
|
||||
this
|
||||
}
|
||||
|
||||
pub fn first_set(&self, nt: NonterminalString) -> &FirstSet {
|
||||
&self.map[&nt]
|
||||
}
|
||||
|
||||
pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
||||
let mut result = vec![];
|
||||
|
||||
@ -82,58 +81,3 @@ impl FirstSets {
|
||||
}
|
||||
}
|
||||
|
||||
mod test {
|
||||
use intern::intern;
|
||||
use normalize::normalize;
|
||||
use parser;
|
||||
use grammar::repr::*;
|
||||
use lr1::Lookahead;
|
||||
use lr1::Lookahead::EOF;
|
||||
use super::FirstSets;
|
||||
|
||||
fn nt(t: &str) -> Symbol {
|
||||
Symbol::Nonterminal(NonterminalString(intern(t)))
|
||||
}
|
||||
|
||||
fn t(t: &str) -> Symbol {
|
||||
Symbol::Terminal(TerminalString(intern(t)))
|
||||
}
|
||||
|
||||
fn la(t: &str) -> Lookahead {
|
||||
Lookahead::Terminal(TerminalString(intern(t)))
|
||||
}
|
||||
|
||||
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
||||
let mut v = first.first(symbols, lookahead);
|
||||
v.sort();
|
||||
v
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let grammar = parser::parse_grammar(r#"
|
||||
grammar Foo {
|
||||
token Tok where { };
|
||||
A = B "C";
|
||||
B: Option<u32> = {
|
||||
"D" => Some(1);
|
||||
=> None;
|
||||
};
|
||||
}
|
||||
"#).unwrap();
|
||||
let grammar = normalize(grammar).unwrap();
|
||||
let first_sets = FirstSets::new(&grammar);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("A")], EOF),
|
||||
vec![la("C"), la("D")]);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("B")], EOF),
|
||||
vec![EOF, la("D")]);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("B"), t("E")], EOF),
|
||||
vec![la("D"), la("E")]);
|
||||
}
|
||||
}
|
51
src/lr1/first/test.rs
Normal file
51
src/lr1/first/test.rs
Normal file
@ -0,0 +1,51 @@
|
||||
use intern::intern;
|
||||
use grammar::repr::*;
|
||||
use lr1::Lookahead;
|
||||
use lr1::Lookahead::EOF;
|
||||
use test_util::{normalized_grammar};
|
||||
use super::FirstSets;
|
||||
|
||||
pub fn nt(t: &str) -> Symbol {
|
||||
Symbol::Nonterminal(NonterminalString(intern(t)))
|
||||
}
|
||||
|
||||
pub fn term(t: &str) -> Symbol {
|
||||
Symbol::Terminal(TerminalString(intern(t)))
|
||||
}
|
||||
|
||||
fn la(t: &str) -> Lookahead {
|
||||
Lookahead::Terminal(TerminalString(intern(t)))
|
||||
}
|
||||
|
||||
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
||||
let mut v = first.first(symbols, lookahead);
|
||||
v.sort();
|
||||
v
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let grammar = normalized_grammar(r#"
|
||||
grammar Foo {
|
||||
token Tok where { };
|
||||
A = B "C";
|
||||
B: Option<u32> = {
|
||||
"D" => Some(1);
|
||||
=> None;
|
||||
};
|
||||
}
|
||||
"#);
|
||||
let first_sets = FirstSets::new(&grammar);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("A")], EOF),
|
||||
vec![la("C"), la("D")]);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("B")], EOF),
|
||||
vec![EOF, la("D")]);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("B"), term("E")], EOF),
|
||||
vec![la("D"), la("E")]);
|
||||
}
|
116
src/lr1/mod.rs
116
src/lr1/mod.rs
@ -1,10 +1,14 @@
|
||||
//! Naive LR(1) generation algorithm.
|
||||
|
||||
use grammar::repr::*;
|
||||
use std::collections::{HashMap};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::{Debug, Formatter, Error};
|
||||
use util::Prefix;
|
||||
|
||||
mod first;
|
||||
|
||||
#[cfg(test)] mod test;
|
||||
|
||||
struct LR1<'grammar> {
|
||||
grammar: &'grammar Grammar,
|
||||
states: Vec<State<'grammar>>,
|
||||
@ -12,7 +16,7 @@ struct LR1<'grammar> {
|
||||
}
|
||||
|
||||
struct State<'grammar> {
|
||||
items: Vec<Configuration<'grammar>>,
|
||||
configurations: Vec<Configuration<'grammar>>,
|
||||
shifts: HashMap<TerminalString, StateIndex>,
|
||||
gotos: HashMap<NonterminalString, StateIndex>,
|
||||
}
|
||||
@ -20,12 +24,13 @@ struct State<'grammar> {
|
||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
struct StateIndex(usize);
|
||||
|
||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
enum Lookahead {
|
||||
EOF,
|
||||
Terminal(TerminalString),
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||
struct Configuration<'grammar> {
|
||||
production: &'grammar Production,
|
||||
index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C
|
||||
@ -41,5 +46,110 @@ impl<'grammar> LR1<'grammar> {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_states(&mut self, start_nt: NonterminalString) {
|
||||
debug_assert!(self.states.is_empty());
|
||||
|
||||
let state0 = self.start_state(start_nt, Lookahead::EOF);
|
||||
self.states.push(state0);
|
||||
}
|
||||
|
||||
fn start_state(&self, id: NonterminalString, lookahead: Lookahead) -> State<'grammar> {
|
||||
let configurations =
|
||||
self.transitive_closure(
|
||||
self.start_configurations(id, lookahead));
|
||||
State { configurations: configurations,
|
||||
shifts: HashMap::new(),
|
||||
gotos: HashMap::new() }
|
||||
}
|
||||
|
||||
fn start_configurations(&self,
|
||||
id: NonterminalString,
|
||||
lookahead: Lookahead)
|
||||
-> Vec<Configuration<'grammar>>
|
||||
{
|
||||
self.grammar.productions_for(id)
|
||||
.iter()
|
||||
.map(|production| {
|
||||
Configuration { production: production,
|
||||
index: 0,
|
||||
lookahead: lookahead }
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// expands `state` with epsilon moves
|
||||
fn transitive_closure(&self, mut configurations: Vec<Configuration<'grammar>>)
|
||||
-> Vec<Configuration<'grammar>>
|
||||
{
|
||||
println!("expand_configurations({:?})", configurations);
|
||||
|
||||
let mut counter = 0;
|
||||
|
||||
let mut set: HashSet<Configuration<'grammar>> =
|
||||
configurations.iter().cloned().collect();
|
||||
|
||||
while counter < configurations.len() {
|
||||
println!("expand_configurations: counter={:?}", counter);
|
||||
|
||||
let new_configurations: Vec<_> =
|
||||
configurations[counter..]
|
||||
.iter()
|
||||
.filter_map(|configuration| {
|
||||
let shift_symbol = configuration.shift_symbol();
|
||||
println!("expand_configurations: configuration: {:?} shift_symbol: {:?}",
|
||||
configuration, shift_symbol);
|
||||
match shift_symbol {
|
||||
None => None, // requires a reduce
|
||||
Some((Symbol::Terminal(_), _)) => None, // requires a shift
|
||||
Some((Symbol::Nonterminal(nt), remainder)) => {
|
||||
Some((nt, remainder, configuration.lookahead))
|
||||
}
|
||||
}
|
||||
})
|
||||
.flat_map(|(nt, remainder, lookahead)| {
|
||||
let first_set = self.first_sets.first(remainder, lookahead);
|
||||
println!("expand_configurations: ({:?}, {:?}, {:?}) first_set={:?}",
|
||||
nt, remainder, lookahead, first_set);
|
||||
first_set.into_iter()
|
||||
.flat_map(move |l| self.start_configurations(nt, l))
|
||||
})
|
||||
.filter(|&configuration| set.insert(configuration))
|
||||
.collect();
|
||||
|
||||
counter = configurations.len();
|
||||
configurations.extend(new_configurations);
|
||||
}
|
||||
|
||||
configurations
|
||||
}
|
||||
}
|
||||
|
||||
impl<'grammar> Configuration<'grammar> {
|
||||
fn shift_symbol(&self) -> Option<(Symbol, &[Symbol])> {
|
||||
if self.index == self.production.symbols.len() {
|
||||
None
|
||||
} else {
|
||||
Some((self.production.symbols[self.index],
|
||||
&self.production.symbols[self.index+1..]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'grammar> Debug for Configuration<'grammar> {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||
write!(fmt, "{} ={} (*){} [{:?}]",
|
||||
self.production.nonterminal,
|
||||
Prefix(" ", &self.production.symbols[..self.index]),
|
||||
Prefix(" ", &self.production.symbols[self.index..]),
|
||||
self.lookahead)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Lookahead {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||
match *self {
|
||||
Lookahead::EOF => write!(fmt, "EOF"),
|
||||
Lookahead::Terminal(s) => write!(fmt, "{}", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
40
src/lr1/test.rs
Normal file
40
src/lr1/test.rs
Normal file
@ -0,0 +1,40 @@
|
||||
use intern::intern;
|
||||
use grammar::repr::*;
|
||||
use test_util::{expect_debug, normalized_grammar};
|
||||
use super::{Configuration, Lookahead, LR1};
|
||||
use super::Lookahead::EOF;
|
||||
|
||||
fn nt(t: &str) -> NonterminalString {
|
||||
NonterminalString(intern(t))
|
||||
}
|
||||
|
||||
fn configurations<'g>(grammar: &'g Grammar, nonterminal: &str, la: Lookahead)
|
||||
-> Vec<Configuration<'g>>
|
||||
{
|
||||
let lr1 = LR1::new(&grammar);
|
||||
let configurations =
|
||||
lr1.transitive_closure(
|
||||
lr1.start_configurations(nt(nonterminal), la));
|
||||
configurations
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn start_state() {
|
||||
let grammar = normalized_grammar(r#"
|
||||
grammar Foo {
|
||||
token Tok where { };
|
||||
A = B "C";
|
||||
B: Option<u32> = {
|
||||
"D" => Some(1);
|
||||
=> None;
|
||||
};
|
||||
}
|
||||
"#);
|
||||
let configurations = configurations(&grammar, "A", EOF);
|
||||
expect_debug(configurations, r#"[
|
||||
A = (*) B "C" [EOF],
|
||||
B = (*) "D" ["C"],
|
||||
B = (*) ["C"]
|
||||
]"#);
|
||||
}
|
||||
|
@ -1,5 +1,9 @@
|
||||
// Need this for rusty_peg
|
||||
#![recursion_limit="256"]
|
||||
|
||||
// I hate this lint.
|
||||
#![allow(unused_parens)]
|
||||
|
||||
#[macro_use]
|
||||
extern crate rusty_peg;
|
||||
extern crate diff;
|
||||
@ -13,6 +17,9 @@ mod normalize;
|
||||
mod parser;
|
||||
mod util;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_util;
|
||||
|
||||
#[cfg(not(test))]
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
|
@ -1,7 +1,7 @@
|
||||
use grammar::repr::{Grammar, Production};
|
||||
use normalize::normalize;
|
||||
use normalize::test_util::expect_debug;
|
||||
use parser;
|
||||
use test_util::expect_debug;
|
||||
|
||||
fn flat_productions(grammar: &Grammar) -> Vec<Production> {
|
||||
let mut productions: Vec<_> =
|
||||
|
@ -1,5 +1,5 @@
|
||||
use parser;
|
||||
use normalize::test_util::compare;
|
||||
use test_util::compare;
|
||||
|
||||
use super::expand_macros;
|
||||
|
||||
|
@ -60,8 +60,5 @@ mod lower;
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Shared routines
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_util;
|
||||
|
||||
mod norm_util;
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
use diff;
|
||||
use grammar::repr as r;
|
||||
use intern::intern;
|
||||
use regex::Regex;
|
||||
use std::fmt::{Debug, Formatter, Error};
|
||||
|
||||
@ -44,3 +46,6 @@ pub fn compare<D:Debug,E:Debug>(actual: D, expected: E) {
|
||||
});
|
||||
}
|
||||
|
||||
pub fn normalized_grammar(s: &str) -> r::Grammar {
|
||||
::normalize::normalize(::parser::parse_grammar(s).unwrap()).unwrap()
|
||||
}
|
14
src/util.rs
14
src/util.rs
@ -15,3 +15,17 @@ impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Prefix<S>(pub &'static str, pub S);
|
||||
|
||||
impl<'a,S:Display> Display for Prefix<&'a [S]> {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||
let &Prefix(prefix, vec) = self;
|
||||
let mut elems = vec.iter();
|
||||
while let Some(elem) = elems.next() {
|
||||
try!(write!(fmt, "{}{}", prefix, elem));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user