Start working on a naive LR(1) algorithm (seems like a good starting

point). Implement first set computation.
This commit is contained in:
Niko Matsakis 2015-06-19 06:42:11 -04:00
parent 334a419c49
commit 4e5204078a
7 changed files with 192 additions and 16 deletions

View File

@ -5,11 +5,13 @@
*/
use intern::InternedString;
use grammar::parse_tree::{NonterminalString, Span, TerminalString};
use std::collections::HashMap;
use std::fmt::{Debug, Display, Formatter, Error};
use util::Sep;
// These concepts we re-use wholesale
pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
#[derive(Clone, Debug)]
pub struct Grammar {
pub action_fn_defns: Vec<ActionFnDefn>,
@ -186,7 +188,7 @@ impl Grammar {
}
}
fn productions_for(&self, nonterminal: NonterminalString) -> &[Production] {
pub fn productions_for(&self, nonterminal: NonterminalString) -> &[Production] {
match self.productions.get(&nonterminal) {
Some(v) => &v[..],
None => &[], // this...probably shouldn't happen actually?

139
src/lr1/first.rs Normal file
View File

@ -0,0 +1,139 @@
//! First set construction and computation.
use grammar::repr::*;
use std::collections::{HashMap, HashSet};
use super::Lookahead;
pub struct FirstSets {
map: HashMap<NonterminalString, FirstSet>
}
pub type FirstSet = HashSet<Option<TerminalString>>;
impl FirstSets {
pub fn new(grammar: &Grammar) -> FirstSets {
let mut this = FirstSets { map: HashMap::new() };
let mut changed = true;
while changed {
changed = false;
for production in grammar.productions.values().flat_map(|p| p.iter()) {
let nt = production.nonterminal;
let lookahead = this.first(&production.symbols, Lookahead::EOF);
let first_set = this.map.entry(nt).or_insert_with(|| HashSet::new());
let cardinality = first_set.len();
first_set.extend(
lookahead.into_iter()
.map(|la| match la {
Lookahead::EOF => None,
Lookahead::Terminal(t) => Some(t),
}));
changed |= (cardinality != first_set.len());
}
}
this
}
pub fn first_set(&self, nt: NonterminalString) -> &FirstSet {
&self.map[&nt]
}
pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
let mut result = vec![];
for symbol in symbols {
match *symbol {
Symbol::Terminal(t) => {
result.push(Lookahead::Terminal(t));
return result;
}
Symbol::Nonterminal(nt) => {
let mut empty_prod = false;
match self.map.get(&nt) {
None => {
// This should only happen during set
// construction; it corresponds to an
// entry that has not yet been
// built. Otherwise, it would mean a
// terminal with no productions. Either
// way, the resulting first set should be
// empty.
}
Some(set) => {
for &opt_terminal in set {
if let Some(terminal) = opt_terminal {
result.push(Lookahead::Terminal(terminal));
} else {
empty_prod = true;
}
}
}
}
if !empty_prod {
return result;
}
}
}
}
result.push(lookahead);
result
}
}
mod test {
use intern::intern;
use normalize::normalize;
use parser;
use grammar::repr::*;
use lr1::Lookahead;
use lr1::Lookahead::EOF;
use super::FirstSets;
fn nt(t: &str) -> Symbol {
Symbol::Nonterminal(NonterminalString(intern(t)))
}
fn t(t: &str) -> Symbol {
Symbol::Terminal(TerminalString(intern(t)))
}
fn la(t: &str) -> Lookahead {
Lookahead::Terminal(TerminalString(intern(t)))
}
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
let mut v = first.first(symbols, lookahead);
v.sort();
v
}
#[test]
fn basic() {
let grammar = parser::parse_grammar(r#"
grammar Foo {
token Tok where { };
A = B "C";
B: Option<u32> = {
"D" => Some(1);
=> None;
};
}
"#).unwrap();
let grammar = normalize(grammar).unwrap();
let first_sets = FirstSets::new(&grammar);
assert_eq!(
first(&first_sets, &[nt("A")], EOF),
vec![la("C"), la("D")]);
assert_eq!(
first(&first_sets, &[nt("B")], EOF),
vec![EOF, la("D")]);
assert_eq!(
first(&first_sets, &[nt("B"), t("E")], EOF),
vec![la("D"), la("E")]);
}
}

45
src/lr1/mod.rs Normal file
View File

@ -0,0 +1,45 @@
//! Naive LR(1) generation algorithm.
use grammar::repr::*;
use std::collections::{HashMap};
mod first;
struct LR1<'grammar> {
grammar: &'grammar Grammar,
states: Vec<State<'grammar>>,
first_sets: first::FirstSets,
}
struct State<'grammar> {
items: Vec<Configuration<'grammar>>,
shifts: HashMap<TerminalString, StateIndex>,
gotos: HashMap<NonterminalString, StateIndex>,
}
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct StateIndex(usize);
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
enum Lookahead {
EOF,
Terminal(TerminalString),
}
struct Configuration<'grammar> {
production: &'grammar Production,
index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C
lookahead: Lookahead,
}
impl<'grammar> LR1<'grammar> {
fn new(grammar: &'grammar Grammar) -> LR1 {
LR1 {
grammar: grammar,
states: vec![],
first_sets: first::FirstSets::new(grammar),
}
}
}

View File

@ -8,6 +8,7 @@ extern crate regex;
mod grammar;
mod intern;
mod lr1;
mod normalize;
mod parser;
mod util;

View File

@ -6,7 +6,7 @@ use intern::{self, intern, InternedString};
use normalize::NormResult;
use normalize::norm_util::{self, Symbols};
use grammar::parse_tree as pt;
use grammar::parse_tree::{TerminalString, NonterminalString};
use grammar::parse_tree::{TerminalString};
use grammar::repr as r;
#[cfg(test)]

View File

@ -1,17 +1,12 @@
use intern::InternedString;
use grammar::repr::{Grammar, Production};
use normalize::normalize;
use normalize::test_util::expect_debug;
use parser;
use std::fmt::{Debug, Formatter, Error};
fn flat_productions(grammar: &Grammar) -> Vec<Production> {
let mut productions: Vec<_> =
grammar.productions.iter()
.flat_map(|(&nt, prods)| {
prods.iter()
.cloned()
})
grammar.productions.values()
.flat_map(|prods| prods.iter().cloned())
.collect();
// sort by the action fn index just to get a consistent ordering

View File

@ -15,9 +15,3 @@ impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
Ok(())
}
}
fn shift<T:Clone>(slice: &mut &[T]) -> T {
let elem = slice[0].clone();
*slice = &slice[1..];
elem
}