mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-04-01 07:51:03 +00:00
Start the naive LR(1) algorithm with support for transitive closures
over epsilon moves
This commit is contained in:
parent
4e5204078a
commit
8c577422bf
@ -71,7 +71,7 @@ pub struct Grammar {
|
|||||||
pub items: Vec<GrammarItem>,
|
pub items: Vec<GrammarItem>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub struct Span(pub usize, pub usize);
|
pub struct Span(pub usize, pub usize);
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
|
@ -9,7 +9,7 @@ use std::collections::HashMap;
|
|||||||
use std::fmt::{Debug, Display, Formatter, Error};
|
use std::fmt::{Debug, Display, Formatter, Error};
|
||||||
use util::Sep;
|
use util::Sep;
|
||||||
|
|
||||||
// These concepts we re-use wholesale
|
// These concepts we re-use wholesale
|
||||||
pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
|
pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
@ -20,7 +20,7 @@ pub struct Grammar {
|
|||||||
pub types: Types,
|
pub types: Types,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq)]
|
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub struct Production {
|
pub struct Production {
|
||||||
// this overlaps with the key in the hashmap, obviously, but it's
|
// this overlaps with the key in the hashmap, obviously, but it's
|
||||||
// handy to have it
|
// handy to have it
|
||||||
@ -30,7 +30,7 @@ pub struct Production {
|
|||||||
pub span: Span,
|
pub span: Span,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub enum Symbol {
|
pub enum Symbol {
|
||||||
Nonterminal(NonterminalString),
|
Nonterminal(NonterminalString),
|
||||||
Terminal(TerminalString),
|
Terminal(TerminalString),
|
||||||
@ -101,7 +101,7 @@ impl Debug for TypeRepr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
#[derive(Copy, Clone, Debug, Hash, PartialOrd, Ord, PartialEq, Eq)]
|
||||||
pub struct ActionFn(u32);
|
pub struct ActionFn(u32);
|
||||||
|
|
||||||
impl ActionFn {
|
impl ActionFn {
|
||||||
|
@ -5,6 +5,9 @@ use std::collections::{HashMap, HashSet};
|
|||||||
|
|
||||||
use super::Lookahead;
|
use super::Lookahead;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test;
|
||||||
|
|
||||||
pub struct FirstSets {
|
pub struct FirstSets {
|
||||||
map: HashMap<NonterminalString, FirstSet>
|
map: HashMap<NonterminalString, FirstSet>
|
||||||
}
|
}
|
||||||
@ -34,10 +37,6 @@ impl FirstSets {
|
|||||||
this
|
this
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn first_set(&self, nt: NonterminalString) -> &FirstSet {
|
|
||||||
&self.map[&nt]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
|
|
||||||
@ -82,58 +81,3 @@ impl FirstSets {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mod test {
|
|
||||||
use intern::intern;
|
|
||||||
use normalize::normalize;
|
|
||||||
use parser;
|
|
||||||
use grammar::repr::*;
|
|
||||||
use lr1::Lookahead;
|
|
||||||
use lr1::Lookahead::EOF;
|
|
||||||
use super::FirstSets;
|
|
||||||
|
|
||||||
fn nt(t: &str) -> Symbol {
|
|
||||||
Symbol::Nonterminal(NonterminalString(intern(t)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn t(t: &str) -> Symbol {
|
|
||||||
Symbol::Terminal(TerminalString(intern(t)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn la(t: &str) -> Lookahead {
|
|
||||||
Lookahead::Terminal(TerminalString(intern(t)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
|
||||||
let mut v = first.first(symbols, lookahead);
|
|
||||||
v.sort();
|
|
||||||
v
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn basic() {
|
|
||||||
let grammar = parser::parse_grammar(r#"
|
|
||||||
grammar Foo {
|
|
||||||
token Tok where { };
|
|
||||||
A = B "C";
|
|
||||||
B: Option<u32> = {
|
|
||||||
"D" => Some(1);
|
|
||||||
=> None;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
"#).unwrap();
|
|
||||||
let grammar = normalize(grammar).unwrap();
|
|
||||||
let first_sets = FirstSets::new(&grammar);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
first(&first_sets, &[nt("A")], EOF),
|
|
||||||
vec![la("C"), la("D")]);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
first(&first_sets, &[nt("B")], EOF),
|
|
||||||
vec![EOF, la("D")]);
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
first(&first_sets, &[nt("B"), t("E")], EOF),
|
|
||||||
vec![la("D"), la("E")]);
|
|
||||||
}
|
|
||||||
}
|
|
51
src/lr1/first/test.rs
Normal file
51
src/lr1/first/test.rs
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
use intern::intern;
|
||||||
|
use grammar::repr::*;
|
||||||
|
use lr1::Lookahead;
|
||||||
|
use lr1::Lookahead::EOF;
|
||||||
|
use test_util::{normalized_grammar};
|
||||||
|
use super::FirstSets;
|
||||||
|
|
||||||
|
pub fn nt(t: &str) -> Symbol {
|
||||||
|
Symbol::Nonterminal(NonterminalString(intern(t)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn term(t: &str) -> Symbol {
|
||||||
|
Symbol::Terminal(TerminalString(intern(t)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn la(t: &str) -> Lookahead {
|
||||||
|
Lookahead::Terminal(TerminalString(intern(t)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
||||||
|
let mut v = first.first(symbols, lookahead);
|
||||||
|
v.sort();
|
||||||
|
v
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn basic() {
|
||||||
|
let grammar = normalized_grammar(r#"
|
||||||
|
grammar Foo {
|
||||||
|
token Tok where { };
|
||||||
|
A = B "C";
|
||||||
|
B: Option<u32> = {
|
||||||
|
"D" => Some(1);
|
||||||
|
=> None;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
let first_sets = FirstSets::new(&grammar);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
first(&first_sets, &[nt("A")], EOF),
|
||||||
|
vec![la("C"), la("D")]);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
first(&first_sets, &[nt("B")], EOF),
|
||||||
|
vec![EOF, la("D")]);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
first(&first_sets, &[nt("B"), term("E")], EOF),
|
||||||
|
vec![la("D"), la("E")]);
|
||||||
|
}
|
116
src/lr1/mod.rs
116
src/lr1/mod.rs
@ -1,10 +1,14 @@
|
|||||||
//! Naive LR(1) generation algorithm.
|
//! Naive LR(1) generation algorithm.
|
||||||
|
|
||||||
use grammar::repr::*;
|
use grammar::repr::*;
|
||||||
use std::collections::{HashMap};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::fmt::{Debug, Formatter, Error};
|
||||||
|
use util::Prefix;
|
||||||
|
|
||||||
mod first;
|
mod first;
|
||||||
|
|
||||||
|
#[cfg(test)] mod test;
|
||||||
|
|
||||||
struct LR1<'grammar> {
|
struct LR1<'grammar> {
|
||||||
grammar: &'grammar Grammar,
|
grammar: &'grammar Grammar,
|
||||||
states: Vec<State<'grammar>>,
|
states: Vec<State<'grammar>>,
|
||||||
@ -12,7 +16,7 @@ struct LR1<'grammar> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct State<'grammar> {
|
struct State<'grammar> {
|
||||||
items: Vec<Configuration<'grammar>>,
|
configurations: Vec<Configuration<'grammar>>,
|
||||||
shifts: HashMap<TerminalString, StateIndex>,
|
shifts: HashMap<TerminalString, StateIndex>,
|
||||||
gotos: HashMap<NonterminalString, StateIndex>,
|
gotos: HashMap<NonterminalString, StateIndex>,
|
||||||
}
|
}
|
||||||
@ -20,12 +24,13 @@ struct State<'grammar> {
|
|||||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
struct StateIndex(usize);
|
struct StateIndex(usize);
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
enum Lookahead {
|
enum Lookahead {
|
||||||
EOF,
|
EOF,
|
||||||
Terminal(TerminalString),
|
Terminal(TerminalString),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||||
struct Configuration<'grammar> {
|
struct Configuration<'grammar> {
|
||||||
production: &'grammar Production,
|
production: &'grammar Production,
|
||||||
index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C
|
index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C
|
||||||
@ -41,5 +46,110 @@ impl<'grammar> LR1<'grammar> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_states(&mut self, start_nt: NonterminalString) {
|
||||||
|
debug_assert!(self.states.is_empty());
|
||||||
|
|
||||||
|
let state0 = self.start_state(start_nt, Lookahead::EOF);
|
||||||
|
self.states.push(state0);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_state(&self, id: NonterminalString, lookahead: Lookahead) -> State<'grammar> {
|
||||||
|
let configurations =
|
||||||
|
self.transitive_closure(
|
||||||
|
self.start_configurations(id, lookahead));
|
||||||
|
State { configurations: configurations,
|
||||||
|
shifts: HashMap::new(),
|
||||||
|
gotos: HashMap::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_configurations(&self,
|
||||||
|
id: NonterminalString,
|
||||||
|
lookahead: Lookahead)
|
||||||
|
-> Vec<Configuration<'grammar>>
|
||||||
|
{
|
||||||
|
self.grammar.productions_for(id)
|
||||||
|
.iter()
|
||||||
|
.map(|production| {
|
||||||
|
Configuration { production: production,
|
||||||
|
index: 0,
|
||||||
|
lookahead: lookahead }
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// expands `state` with epsilon moves
|
||||||
|
fn transitive_closure(&self, mut configurations: Vec<Configuration<'grammar>>)
|
||||||
|
-> Vec<Configuration<'grammar>>
|
||||||
|
{
|
||||||
|
println!("expand_configurations({:?})", configurations);
|
||||||
|
|
||||||
|
let mut counter = 0;
|
||||||
|
|
||||||
|
let mut set: HashSet<Configuration<'grammar>> =
|
||||||
|
configurations.iter().cloned().collect();
|
||||||
|
|
||||||
|
while counter < configurations.len() {
|
||||||
|
println!("expand_configurations: counter={:?}", counter);
|
||||||
|
|
||||||
|
let new_configurations: Vec<_> =
|
||||||
|
configurations[counter..]
|
||||||
|
.iter()
|
||||||
|
.filter_map(|configuration| {
|
||||||
|
let shift_symbol = configuration.shift_symbol();
|
||||||
|
println!("expand_configurations: configuration: {:?} shift_symbol: {:?}",
|
||||||
|
configuration, shift_symbol);
|
||||||
|
match shift_symbol {
|
||||||
|
None => None, // requires a reduce
|
||||||
|
Some((Symbol::Terminal(_), _)) => None, // requires a shift
|
||||||
|
Some((Symbol::Nonterminal(nt), remainder)) => {
|
||||||
|
Some((nt, remainder, configuration.lookahead))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.flat_map(|(nt, remainder, lookahead)| {
|
||||||
|
let first_set = self.first_sets.first(remainder, lookahead);
|
||||||
|
println!("expand_configurations: ({:?}, {:?}, {:?}) first_set={:?}",
|
||||||
|
nt, remainder, lookahead, first_set);
|
||||||
|
first_set.into_iter()
|
||||||
|
.flat_map(move |l| self.start_configurations(nt, l))
|
||||||
|
})
|
||||||
|
.filter(|&configuration| set.insert(configuration))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
counter = configurations.len();
|
||||||
|
configurations.extend(new_configurations);
|
||||||
|
}
|
||||||
|
|
||||||
|
configurations
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'grammar> Configuration<'grammar> {
|
||||||
|
fn shift_symbol(&self) -> Option<(Symbol, &[Symbol])> {
|
||||||
|
if self.index == self.production.symbols.len() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some((self.production.symbols[self.index],
|
||||||
|
&self.production.symbols[self.index+1..]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'grammar> Debug for Configuration<'grammar> {
|
||||||
|
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||||
|
write!(fmt, "{} ={} (*){} [{:?}]",
|
||||||
|
self.production.nonterminal,
|
||||||
|
Prefix(" ", &self.production.symbols[..self.index]),
|
||||||
|
Prefix(" ", &self.production.symbols[self.index..]),
|
||||||
|
self.lookahead)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for Lookahead {
|
||||||
|
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||||
|
match *self {
|
||||||
|
Lookahead::EOF => write!(fmt, "EOF"),
|
||||||
|
Lookahead::Terminal(s) => write!(fmt, "{}", s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
40
src/lr1/test.rs
Normal file
40
src/lr1/test.rs
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
use intern::intern;
|
||||||
|
use grammar::repr::*;
|
||||||
|
use test_util::{expect_debug, normalized_grammar};
|
||||||
|
use super::{Configuration, Lookahead, LR1};
|
||||||
|
use super::Lookahead::EOF;
|
||||||
|
|
||||||
|
fn nt(t: &str) -> NonterminalString {
|
||||||
|
NonterminalString(intern(t))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn configurations<'g>(grammar: &'g Grammar, nonterminal: &str, la: Lookahead)
|
||||||
|
-> Vec<Configuration<'g>>
|
||||||
|
{
|
||||||
|
let lr1 = LR1::new(&grammar);
|
||||||
|
let configurations =
|
||||||
|
lr1.transitive_closure(
|
||||||
|
lr1.start_configurations(nt(nonterminal), la));
|
||||||
|
configurations
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn start_state() {
|
||||||
|
let grammar = normalized_grammar(r#"
|
||||||
|
grammar Foo {
|
||||||
|
token Tok where { };
|
||||||
|
A = B "C";
|
||||||
|
B: Option<u32> = {
|
||||||
|
"D" => Some(1);
|
||||||
|
=> None;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
let configurations = configurations(&grammar, "A", EOF);
|
||||||
|
expect_debug(configurations, r#"[
|
||||||
|
A = (*) B "C" [EOF],
|
||||||
|
B = (*) "D" ["C"],
|
||||||
|
B = (*) ["C"]
|
||||||
|
]"#);
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,9 @@
|
|||||||
|
// Need this for rusty_peg
|
||||||
#![recursion_limit="256"]
|
#![recursion_limit="256"]
|
||||||
|
|
||||||
|
// I hate this lint.
|
||||||
|
#![allow(unused_parens)]
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate rusty_peg;
|
extern crate rusty_peg;
|
||||||
extern crate diff;
|
extern crate diff;
|
||||||
@ -13,6 +17,9 @@ mod normalize;
|
|||||||
mod parser;
|
mod parser;
|
||||||
mod util;
|
mod util;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_util;
|
||||||
|
|
||||||
#[cfg(not(test))]
|
#[cfg(not(test))]
|
||||||
fn main() {
|
fn main() {
|
||||||
println!("Hello, world!");
|
println!("Hello, world!");
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use grammar::repr::{Grammar, Production};
|
use grammar::repr::{Grammar, Production};
|
||||||
use normalize::normalize;
|
use normalize::normalize;
|
||||||
use normalize::test_util::expect_debug;
|
|
||||||
use parser;
|
use parser;
|
||||||
|
use test_util::expect_debug;
|
||||||
|
|
||||||
fn flat_productions(grammar: &Grammar) -> Vec<Production> {
|
fn flat_productions(grammar: &Grammar) -> Vec<Production> {
|
||||||
let mut productions: Vec<_> =
|
let mut productions: Vec<_> =
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use parser;
|
use parser;
|
||||||
use normalize::test_util::compare;
|
use test_util::compare;
|
||||||
|
|
||||||
use super::expand_macros;
|
use super::expand_macros;
|
||||||
|
|
||||||
|
@ -60,8 +60,5 @@ mod lower;
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Shared routines
|
// Shared routines
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test_util;
|
|
||||||
|
|
||||||
mod norm_util;
|
mod norm_util;
|
||||||
|
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
use diff;
|
use diff;
|
||||||
|
use grammar::repr as r;
|
||||||
|
use intern::intern;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::fmt::{Debug, Formatter, Error};
|
use std::fmt::{Debug, Formatter, Error};
|
||||||
|
|
||||||
@ -44,3 +46,6 @@ pub fn compare<D:Debug,E:Debug>(actual: D, expected: E) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn normalized_grammar(s: &str) -> r::Grammar {
|
||||||
|
::normalize::normalize(::parser::parse_grammar(s).unwrap()).unwrap()
|
||||||
|
}
|
14
src/util.rs
14
src/util.rs
@ -15,3 +15,17 @@ impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct Prefix<S>(pub &'static str, pub S);
|
||||||
|
|
||||||
|
impl<'a,S:Display> Display for Prefix<&'a [S]> {
|
||||||
|
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||||
|
let &Prefix(prefix, vec) = self;
|
||||||
|
let mut elems = vec.iter();
|
||||||
|
while let Some(elem) = elems.next() {
|
||||||
|
try!(write!(fmt, "{}{}", prefix, elem));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user