From 6ea20abb8f14e6334ea2ae9088961d96e794d5b4 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Thu, 18 Jun 2015 05:35:09 -0400 Subject: [PATCH] Code up the lowering step (or a rough draft; no tests yet...) --- src/grammar/parse_tree.rs | 2 +- src/grammar/repr.rs | 45 +++++--- src/normalize/lower/mod.rs | 176 ++++++++++++++++++++++++++++++ src/normalize/macro_expand/mod.rs | 5 +- src/normalize/norm_util.rs | 16 +-- src/normalize/tyinfer/mod.rs | 8 +- src/normalize/tyinfer/test.rs | 2 +- 7 files changed, 220 insertions(+), 34 deletions(-) diff --git a/src/grammar/parse_tree.rs b/src/grammar/parse_tree.rs index bb9a4da..ff047c3 100644 --- a/src/grammar/parse_tree.rs +++ b/src/grammar/parse_tree.rs @@ -224,7 +224,7 @@ impl Symbol { pub fn type_repr(&self, types: &Types) -> TypeRepr { match *self { Symbol::Terminal(_) => types.terminal_type().clone(), - Symbol::Nonterminal(id) => types.nt_type(id).unwrap().clone(), + Symbol::Nonterminal(id) => types.nonterminal_type(id).clone(), Symbol::Choose(ref s) => s.type_repr(types), Symbol::Name(_, ref s) => s.type_repr(types), diff --git a/src/grammar/repr.rs b/src/grammar/repr.rs index 51e4a8a..3cca288 100644 --- a/src/grammar/repr.rs +++ b/src/grammar/repr.rs @@ -5,28 +5,25 @@ */ use intern::InternedString; +use grammar::parse_tree::Span; use std::collections::HashMap; use std::fmt::{Display, Formatter, Error}; use util::Sep; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug)] pub struct Grammar { - pub token: TokenData, - pub action_fns: Vec, + pub action_fn_defns: Vec, pub productions: Vec, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct TokenData { - pub token_type: TypeRepr, pub conversions: HashMap, + pub types: Types, } #[derive(Clone, Debug, PartialEq, Eq)] pub struct Production { + pub span: Span, pub nonterminal: InternedString, pub symbols: Vec, - pub action_fn: ActionFnIndex, + pub action_fn: ActionFn, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -36,10 +33,10 @@ pub enum Symbol { } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct ActionFn { - pub arg_names: Vec, +pub struct ActionFnDefn { + pub arg_patterns: Vec, pub arg_types: Vec, - pub ret_type: Vec, + pub ret_type: TypeRepr, pub code: String, } @@ -50,7 +47,7 @@ pub enum TypeRepr { Lifetime(InternedString), } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Types { terminal_type: TypeRepr, nonterminal_types: HashMap @@ -70,9 +67,13 @@ impl Types { &self.terminal_type } - pub fn nt_type(&self, nt_id: InternedString) -> Option<&TypeRepr> { + pub fn lookup_nonterminal_type(&self, nt_id: InternedString) -> Option<&TypeRepr> { self.nonterminal_types.get(&nt_id) } + + pub fn nonterminal_type(&self, nt_id: InternedString) -> &TypeRepr { + &self.nonterminal_types[&nt_id] + } } impl Display for TypeRepr { @@ -91,11 +92,11 @@ impl Display for TypeRepr { } #[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct ActionFnIndex(u32); +pub struct ActionFn(u32); -impl ActionFnIndex { - pub fn new(x: usize) -> ActionFnIndex { - ActionFnIndex(x as u32) +impl ActionFn { + pub fn new(x: usize) -> ActionFn { + ActionFn(x as u32) } pub fn index(&self) -> usize { @@ -103,3 +104,11 @@ impl ActionFnIndex { } } +impl Symbol { + pub fn ty<'ty>(&self, t: &'ty Types) -> &'ty TypeRepr { + match *self { + Symbol::Nonterminal(id) => t.terminal_type(), + Symbol::Terminal(id) => t.nonterminal_type(id), + } + } +} diff --git a/src/normalize/lower/mod.rs b/src/normalize/lower/mod.rs index 833b167..013d6ea 100644 --- a/src/normalize/lower/mod.rs +++ b/src/normalize/lower/mod.rs @@ -1,3 +1,179 @@ /*! * Lower */ + +use intern::{self, intern, InternedString}; +use normalize::NormResult; +use normalize::norm_util::{self, Symbols}; +use grammar::parse_tree as pt; +use grammar::repr as r; +use std::collections::HashMap; +use util::Sep; + +pub fn lower(grammar: pt::Grammar, types: r::Types) -> NormResult { + let mut state = LowerState::new(types); + state.lower(grammar) +} + +struct LowerState { + grammar: r::Grammar +} + +impl LowerState { + fn new(types: r::Types) -> LowerState { + LowerState { + grammar: r::Grammar { + action_fn_defns: vec![], + productions: vec![], + conversions: HashMap::new(), + types: types + } + } + } + + fn lower(mut self, grammar: pt::Grammar) -> NormResult { + for item in grammar.items { + match item { + pt::GrammarItem::TokenType(data) => { + self.grammar.conversions.extend(data.conversions); + } + + pt::GrammarItem::Nonterminal(nt) => { + for alt in nt.alternatives { + let nt_type = self.grammar.types.nonterminal_type(nt.name).clone(); + let symbols = self.symbols(&alt.expr.symbols); + let action_fn = self.action_fn(nt_type, &alt.expr, &symbols, alt.action); + let production = r::Production { + span: alt.span, + nonterminal: nt.name, + symbols: symbols, + action_fn: action_fn, + }; + self.grammar.productions.push(production); + } + } + } + } + + Ok(self.grammar) + } + + fn action_fn(&mut self, + nt_type: r::TypeRepr, + expr: &pt::ExprSymbol, + symbols: &[r::Symbol], + action: Option) + -> r::ActionFn + { + let action = match action { + Some(s) => s, + None => format!("(~~)"), + }; + + // Note that the action fn takes ALL of the symbols in `expr` + // as arguments, and some of them are simply dropped based on + // the user's selections. + + // The set of argument types is thus the type of all symbols: + let arg_types: Vec = + symbols.iter().map(|s| s.ty(&self.grammar.types)).cloned().collect(); + + let action_fn_defn = match norm_util::analyze_expr(expr) { + Symbols::Named(names) => { + // if there are named symbols, we want to give the + // arguments the names that the user gave them: + let arg_patterns = + patterns(names.iter().map(|&(index, name, _)| (index, name)), + symbols.len()); + + r::ActionFnDefn { + arg_patterns: arg_patterns, + arg_types: arg_types, + ret_type: nt_type, + code: action + } + } + Symbols::Anon(indices) => { + let names: Vec<_> = + (0..indices.len()).map(|i| fresh_name(i, &action)).collect(); + let arg_patterns = + patterns(indices.iter().map(|&(index, _)| index) + .zip(names.iter().cloned()), + symbols.len()); + let name_str = intern::read(|interner| { + let name_strs: Vec<_> = names.iter().map(|&n| interner.data(n)).collect(); + name_strs.connect(", ") + }); + let action = action.replace("~~", &name_str); + r::ActionFnDefn { + arg_patterns: arg_patterns, + arg_types: arg_types, + ret_type: nt_type, + code: action + } + } + }; + + let index = r::ActionFn::new(self.grammar.action_fn_defns.len()); + self.grammar.action_fn_defns.push(action_fn_defn); + + index + } + + fn symbols(&mut self, symbols: &[pt::Symbol]) -> Vec { + symbols.iter().map(|sym| self.symbol(sym)).collect() + } + + fn symbol(&mut self, symbol: &pt::Symbol) -> r::Symbol { + match *symbol { + pt::Symbol::Terminal(id) => r::Symbol::Terminal(id), + pt::Symbol::Nonterminal(id) => r::Symbol::Nonterminal(id), + pt::Symbol::Choose(ref s) | pt::Symbol::Name(_, ref s) => self.symbol(s), + + pt::Symbol::Macro(..) | pt::Symbol::Repeat(..) | pt::Symbol::Expr(..) => { + unreachable!("symbol `{}` should have been normalized away by now", symbol) + } + } + } +} + +fn patterns(mut chosen: I, num_args: usize) -> Vec + where I: Iterator +{ + let blank = intern("_"); + + let mut next_chosen = chosen.next(); + + let result = + (0..num_args) + .map(|index| { + match next_chosen { + Some((chosen_index, chosen_name)) if chosen_index == index => { + next_chosen = chosen.next(); + chosen_name + } + _ => blank, + } + }) + .collect(); + + debug_assert!(next_chosen.is_none()); + + result +} + +fn fresh_name(counter: usize, action_str: &str) -> InternedString { + let mut name = format!("__{}", counter); + + // Check whether this string appears anywhere in the action. If + // so, keep appending an underscore until it doesn't. :) Obviously + // this is stricter than needed, since the action string might be + // like `print("__1")`, in which case we'll detect a false + // conflict (or it might contain a variable named `__1x`, + // etc). But so what. + while action_str.contains(&name) { + name.push('_'); + } + + intern(&name) +} diff --git a/src/normalize/macro_expand/mod.rs b/src/normalize/macro_expand/mod.rs index 8248e86..9e2c382 100644 --- a/src/normalize/macro_expand/mod.rs +++ b/src/normalize/macro_expand/mod.rs @@ -306,7 +306,7 @@ impl MacroExpander { let ty_ref = match norm_util::analyze_expr(&expr) { Symbols::Named(names) => { - let (ex_id, ex_sym) = names[0]; + let (_, ex_id, ex_sym) = names[0]; return_err!( expr.span, "named symbols like `~{}:{}` are only allowed at the top-level of a nonterminal", @@ -315,8 +315,7 @@ impl MacroExpander { Symbols::Anon(syms) => { maybe_tuple( syms.into_iter() - .cloned() - .map(TypeRef::OfSymbol) + .map(|(_, s)| TypeRef::OfSymbol(s.clone())) .collect()) } }; diff --git a/src/normalize/norm_util.rs b/src/normalize/norm_util.rs index cd1782d..4982ceb 100644 --- a/src/normalize/norm_util.rs +++ b/src/normalize/norm_util.rs @@ -9,8 +9,8 @@ pub enum AlternativeAction<'a> { #[derive(Debug)] pub enum Symbols<'a> { - Named(Vec<(InternedString, &'a Symbol)>), - Anon(Vec<&'a Symbol>), + Named(Vec<(usize, InternedString, &'a Symbol)>), + Anon(Vec<(usize, &'a Symbol)>), } pub fn analyze_action<'a>(alt: &'a Alternative) -> AlternativeAction<'a> { @@ -27,8 +27,9 @@ pub fn analyze_expr<'a>(expr: &'a ExprSymbol) -> Symbols<'a> { let named_symbols: Vec<_> = expr.symbols .iter() - .filter_map(|sym| match *sym { - Symbol::Name(id, ref sub) => Some((id, &**sub)), + .enumerate() + .filter_map(|(idx, sym)| match *sym { + Symbol::Name(id, ref sub) => Some((idx, id, &**sub)), _ => None, }) .collect(); @@ -40,8 +41,9 @@ pub fn analyze_expr<'a>(expr: &'a ExprSymbol) -> Symbols<'a> { let chosen_symbol_types: Vec<_> = expr.symbols .iter() - .filter_map(|sym| match *sym { - Symbol::Choose(ref sub) => Some(&**sub), + .enumerate() + .filter_map(|(idx, sym)| match *sym { + Symbol::Choose(ref sub) => Some((idx, &**sub)), _ => None, }) .collect(); @@ -50,5 +52,5 @@ pub fn analyze_expr<'a>(expr: &'a ExprSymbol) -> Symbols<'a> { } // If they didn't choose anything with a `~`, make a tuple of everything. - Symbols::Anon(expr.symbols.iter().collect()) + Symbols::Anon(expr.symbols.iter().enumerate().collect()) } diff --git a/src/normalize/tyinfer/mod.rs b/src/normalize/tyinfer/mod.rs index fa1c25e..7261fe2 100644 --- a/src/normalize/tyinfer/mod.rs +++ b/src/normalize/tyinfer/mod.rs @@ -85,14 +85,14 @@ impl<'grammar> TypeInferencer<'grammar> { for id in ids { try!(self.nonterminal_type(id)); - debug_assert!(self.types.nt_type(id).is_some()); + debug_assert!(self.types.lookup_nonterminal_type(id).is_some()); } Ok(self.types) } fn nonterminal_type(&mut self, id: InternedString) -> NormResult { - if let Some(repr) = self.types.nt_type(id) { + if let Some(repr) = self.types.lookup_nonterminal_type(id) { return Ok(repr.clone()); } @@ -181,13 +181,13 @@ impl<'grammar> TypeInferencer<'grammar> { AlternativeAction::Default(Symbols::Named(ref syms)) => { return_err!(alt.span, "cannot infer types in the presence of named symbols like `~{}:{}`", - syms[0].0, syms[0].1); + syms[0].1, syms[0].2); } AlternativeAction::Default(Symbols::Anon(syms)) => { let symbol_types: Vec = try! { syms.iter() - .map(|sym| self.symbol_type(sym)) + .map(|&(_, sym)| self.symbol_type(sym)) .collect() }; Ok(maybe_tuple(symbol_types)) diff --git a/src/normalize/tyinfer/test.rs b/src/normalize/tyinfer/test.rs index d2d8d62..15dc5b3 100644 --- a/src/normalize/tyinfer/test.rs +++ b/src/normalize/tyinfer/test.rs @@ -20,7 +20,7 @@ fn compare(g1: &str, expected: Vec<(&'static str, &'static str)>) { let id = intern(nt_id); let ty = type_repr(nt_type); println!("expected type of {:?} is {:?}", id, ty); - assert_eq!(types.nt_type(id), Some(&ty)); + assert_eq!(types.nonterminal_type(id), &ty); } }