mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-16 17:00:53 +00:00
Start working on a naive LR(1) algorithm (seems like a good starting
point). Implement first set computation.
This commit is contained in:
parent
334a419c49
commit
4e5204078a
@ -5,11 +5,13 @@
|
||||
*/
|
||||
|
||||
use intern::InternedString;
|
||||
use grammar::parse_tree::{NonterminalString, Span, TerminalString};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Debug, Display, Formatter, Error};
|
||||
use util::Sep;
|
||||
|
||||
// These concepts we re-use wholesale
|
||||
pub use grammar::parse_tree::{NonterminalString, Span, TerminalString};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Grammar {
|
||||
pub action_fn_defns: Vec<ActionFnDefn>,
|
||||
@ -186,7 +188,7 @@ impl Grammar {
|
||||
}
|
||||
}
|
||||
|
||||
fn productions_for(&self, nonterminal: NonterminalString) -> &[Production] {
|
||||
pub fn productions_for(&self, nonterminal: NonterminalString) -> &[Production] {
|
||||
match self.productions.get(&nonterminal) {
|
||||
Some(v) => &v[..],
|
||||
None => &[], // this...probably shouldn't happen actually?
|
||||
|
139
src/lr1/first.rs
Normal file
139
src/lr1/first.rs
Normal file
@ -0,0 +1,139 @@
|
||||
//! First set construction and computation.
|
||||
|
||||
use grammar::repr::*;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use super::Lookahead;
|
||||
|
||||
pub struct FirstSets {
|
||||
map: HashMap<NonterminalString, FirstSet>
|
||||
}
|
||||
|
||||
pub type FirstSet = HashSet<Option<TerminalString>>;
|
||||
|
||||
impl FirstSets {
|
||||
pub fn new(grammar: &Grammar) -> FirstSets {
|
||||
let mut this = FirstSets { map: HashMap::new() };
|
||||
let mut changed = true;
|
||||
while changed {
|
||||
changed = false;
|
||||
for production in grammar.productions.values().flat_map(|p| p.iter()) {
|
||||
let nt = production.nonterminal;
|
||||
let lookahead = this.first(&production.symbols, Lookahead::EOF);
|
||||
let first_set = this.map.entry(nt).or_insert_with(|| HashSet::new());
|
||||
let cardinality = first_set.len();
|
||||
first_set.extend(
|
||||
lookahead.into_iter()
|
||||
.map(|la| match la {
|
||||
Lookahead::EOF => None,
|
||||
Lookahead::Terminal(t) => Some(t),
|
||||
}));
|
||||
changed |= (cardinality != first_set.len());
|
||||
}
|
||||
}
|
||||
this
|
||||
}
|
||||
|
||||
pub fn first_set(&self, nt: NonterminalString) -> &FirstSet {
|
||||
&self.map[&nt]
|
||||
}
|
||||
|
||||
pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
||||
let mut result = vec![];
|
||||
|
||||
for symbol in symbols {
|
||||
match *symbol {
|
||||
Symbol::Terminal(t) => {
|
||||
result.push(Lookahead::Terminal(t));
|
||||
return result;
|
||||
}
|
||||
|
||||
Symbol::Nonterminal(nt) => {
|
||||
let mut empty_prod = false;
|
||||
match self.map.get(&nt) {
|
||||
None => {
|
||||
// This should only happen during set
|
||||
// construction; it corresponds to an
|
||||
// entry that has not yet been
|
||||
// built. Otherwise, it would mean a
|
||||
// terminal with no productions. Either
|
||||
// way, the resulting first set should be
|
||||
// empty.
|
||||
}
|
||||
Some(set) => {
|
||||
for &opt_terminal in set {
|
||||
if let Some(terminal) = opt_terminal {
|
||||
result.push(Lookahead::Terminal(terminal));
|
||||
} else {
|
||||
empty_prod = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !empty_prod {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.push(lookahead);
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
mod test {
|
||||
use intern::intern;
|
||||
use normalize::normalize;
|
||||
use parser;
|
||||
use grammar::repr::*;
|
||||
use lr1::Lookahead;
|
||||
use lr1::Lookahead::EOF;
|
||||
use super::FirstSets;
|
||||
|
||||
fn nt(t: &str) -> Symbol {
|
||||
Symbol::Nonterminal(NonterminalString(intern(t)))
|
||||
}
|
||||
|
||||
fn t(t: &str) -> Symbol {
|
||||
Symbol::Terminal(TerminalString(intern(t)))
|
||||
}
|
||||
|
||||
fn la(t: &str) -> Lookahead {
|
||||
Lookahead::Terminal(TerminalString(intern(t)))
|
||||
}
|
||||
|
||||
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
|
||||
let mut v = first.first(symbols, lookahead);
|
||||
v.sort();
|
||||
v
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let grammar = parser::parse_grammar(r#"
|
||||
grammar Foo {
|
||||
token Tok where { };
|
||||
A = B "C";
|
||||
B: Option<u32> = {
|
||||
"D" => Some(1);
|
||||
=> None;
|
||||
};
|
||||
}
|
||||
"#).unwrap();
|
||||
let grammar = normalize(grammar).unwrap();
|
||||
let first_sets = FirstSets::new(&grammar);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("A")], EOF),
|
||||
vec![la("C"), la("D")]);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("B")], EOF),
|
||||
vec![EOF, la("D")]);
|
||||
|
||||
assert_eq!(
|
||||
first(&first_sets, &[nt("B"), t("E")], EOF),
|
||||
vec![la("D"), la("E")]);
|
||||
}
|
||||
}
|
45
src/lr1/mod.rs
Normal file
45
src/lr1/mod.rs
Normal file
@ -0,0 +1,45 @@
|
||||
//! Naive LR(1) generation algorithm.
|
||||
|
||||
use grammar::repr::*;
|
||||
use std::collections::{HashMap};
|
||||
|
||||
mod first;
|
||||
|
||||
struct LR1<'grammar> {
|
||||
grammar: &'grammar Grammar,
|
||||
states: Vec<State<'grammar>>,
|
||||
first_sets: first::FirstSets,
|
||||
}
|
||||
|
||||
struct State<'grammar> {
|
||||
items: Vec<Configuration<'grammar>>,
|
||||
shifts: HashMap<TerminalString, StateIndex>,
|
||||
gotos: HashMap<NonterminalString, StateIndex>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
struct StateIndex(usize);
|
||||
|
||||
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
enum Lookahead {
|
||||
EOF,
|
||||
Terminal(TerminalString),
|
||||
}
|
||||
|
||||
struct Configuration<'grammar> {
|
||||
production: &'grammar Production,
|
||||
index: usize, // the dot comes before `index`, so `index` would be 1 for X = A (*) B C
|
||||
lookahead: Lookahead,
|
||||
}
|
||||
|
||||
impl<'grammar> LR1<'grammar> {
|
||||
fn new(grammar: &'grammar Grammar) -> LR1 {
|
||||
LR1 {
|
||||
grammar: grammar,
|
||||
states: vec![],
|
||||
first_sets: first::FirstSets::new(grammar),
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ extern crate regex;
|
||||
|
||||
mod grammar;
|
||||
mod intern;
|
||||
mod lr1;
|
||||
mod normalize;
|
||||
mod parser;
|
||||
mod util;
|
||||
|
@ -6,7 +6,7 @@ use intern::{self, intern, InternedString};
|
||||
use normalize::NormResult;
|
||||
use normalize::norm_util::{self, Symbols};
|
||||
use grammar::parse_tree as pt;
|
||||
use grammar::parse_tree::{TerminalString, NonterminalString};
|
||||
use grammar::parse_tree::{TerminalString};
|
||||
use grammar::repr as r;
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -1,17 +1,12 @@
|
||||
use intern::InternedString;
|
||||
use grammar::repr::{Grammar, Production};
|
||||
use normalize::normalize;
|
||||
use normalize::test_util::expect_debug;
|
||||
use parser;
|
||||
use std::fmt::{Debug, Formatter, Error};
|
||||
|
||||
fn flat_productions(grammar: &Grammar) -> Vec<Production> {
|
||||
let mut productions: Vec<_> =
|
||||
grammar.productions.iter()
|
||||
.flat_map(|(&nt, prods)| {
|
||||
prods.iter()
|
||||
.cloned()
|
||||
})
|
||||
grammar.productions.values()
|
||||
.flat_map(|prods| prods.iter().cloned())
|
||||
.collect();
|
||||
|
||||
// sort by the action fn index just to get a consistent ordering
|
||||
|
@ -15,9 +15,3 @@ impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn shift<T:Clone>(slice: &mut &[T]) -> T {
|
||||
let elem = slice[0].clone();
|
||||
*slice = &slice[1..];
|
||||
elem
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user