From 44d7050ddb6339e2edce6eafb76119b7a7b86c9b Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Wed, 17 Jun 2015 15:02:34 -0400 Subject: [PATCH] generate action code in macro expand --- src/grammar/mod.rs | 1 + src/grammar/nonce.rs | 27 +++++++++++++ src/grammar/repr.rs | 1 + src/normalize/macro_expand/mod.rs | 65 ++++++++++++++++++++++++++---- src/normalize/macro_expand/test.rs | 2 +- src/normalize/norm_util.rs | 36 ++++++++--------- src/normalize/tyinfer/test.rs | 1 - src/parser/mod.rs | 2 +- 8 files changed, 107 insertions(+), 28 deletions(-) create mode 100644 src/grammar/nonce.rs diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs index f679e36..29bb5cd 100644 --- a/src/grammar/mod.rs +++ b/src/grammar/mod.rs @@ -1,5 +1,6 @@ //! The grammar definition. +pub mod nonce; pub mod parse_tree; pub mod repr; // pub mod token; diff --git a/src/grammar/nonce.rs b/src/grammar/nonce.rs new file mode 100644 index 0000000..2ef7712 --- /dev/null +++ b/src/grammar/nonce.rs @@ -0,0 +1,27 @@ +/*! + +A NONCE is just a guaranteed unique identifier. We use it to create +persistent identity for alternatives as we transform the grammar. + +*/ + +use std::cell::Cell; + +thread_local! { + static NONCE: Cell = Cell::new(0) +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Nonce { + counter: u32 +} + +impl Nonce { + fn new() -> Nonce { + NONCE.with(|counter| { + let c = counter.get(); + counter.set(c.checked_add(1).unwrap()); + Nonce { counter: c } + }) + } +} diff --git a/src/grammar/repr.rs b/src/grammar/repr.rs index 441b413..474f13e 100644 --- a/src/grammar/repr.rs +++ b/src/grammar/repr.rs @@ -63,3 +63,4 @@ impl Display for TypeRepr { } } } + diff --git a/src/normalize/macro_expand/mod.rs b/src/normalize/macro_expand/mod.rs index a0f701e..d1e57bb 100644 --- a/src/normalize/macro_expand/mod.rs +++ b/src/normalize/macro_expand/mod.rs @@ -1,8 +1,10 @@ use std::collections::{HashMap, HashSet}; use intern::{intern, read, InternedString}; use grammar::parse_tree::{Alternative, Condition, ConditionOp, ExprSymbol, Grammar, GrammarItem, - MacroSymbol, NonterminalData, RepeatSymbol, Span, Symbol, TypeRef}; + MacroSymbol, NonterminalData, RepeatOp, RepeatSymbol, + Span, Symbol, TypeRef}; use normalize::{NormResult, NormError}; +use normalize::norm_util::{self, Symbols}; use regex::Regex; use std::mem; @@ -67,7 +69,7 @@ impl MacroExpander { Symbol::Macro(msym) => items.push(try!(self.expand_macro_symbol(msym))), Symbol::Expr(expr) => - items.push(self.expand_expr_symbol(expr)), + items.push(try!(self.expand_expr_symbol(expr))), _ => assert!(false, "don't know how to expand `{:?}`", sym) } @@ -110,6 +112,7 @@ impl MacroExpander { return; } Symbol::Repeat(ref mut repeat) => { + // self.replace_repeat(repeat); self.replace_symbol(&mut repeat.symbol); return; } @@ -132,6 +135,28 @@ impl MacroExpander { } } + fn replace_repeat(&mut self, repeat: &mut RepeatSymbol) { + match repeat.op { + RepeatOp::Star => { + // Convert X* to X+? and recurse. Annoyingly, we have + // to clone for this, due to not being able to move + // out from `&mut` pointers. + *repeat = RepeatSymbol { + op: RepeatOp::Question, + symbol: Symbol::Repeat(Box::new(RepeatSymbol { + op: RepeatOp::Plus, + symbol: repeat.symbol.clone() + })) + }; + return self.replace_repeat(repeat); + } + RepeatOp::Question | + RepeatOp::Plus => { + self.replace_symbol(&mut repeat.symbol); + } + } + } + /////////////////////////////////////////////////////////////////////////// // Macro expansion @@ -301,18 +326,44 @@ impl MacroExpander { /////////////////////////////////////////////////////////////////////////// // Expr expansion - fn expand_expr_symbol(&mut self, expr: ExprSymbol) -> GrammarItem { + fn expand_expr_symbol(&mut self, expr: ExprSymbol) -> NormResult { let name = intern(&expr.canonical_form()); - GrammarItem::Nonterminal(NonterminalData { + + let ty_ref = match norm_util::analyze_expr(&expr) { + Symbols::Named(names) => { + let (ex_id, ex_sym) = names[0]; + return_err!( + expr.span, + "named symbols like `~{}:{}` are only allowed at the top-level of a nonterminal", + ex_id, ex_sym) + } + Symbols::Anon(syms) => { + maybe_tuple( + syms.into_iter() + .cloned() + .map(TypeRef::OfSymbol) + .collect()) + } + }; + + Ok(GrammarItem::Nonterminal(NonterminalData { span: expr.span, name: name, args: vec![], - type_decl: None, + type_decl: Some(ty_ref), alternatives: vec![Alternative { span: expr.span, expr: expr, condition: None, - action: None }] - }) + action: Some(format!("(~~)")) }] + })) } } + +fn maybe_tuple(v: Vec) -> TypeRef { + if v.len() == 1 { + v.into_iter().next().unwrap() + } else { + TypeRef::Tuple(v) + } +} diff --git a/src/normalize/macro_expand/test.rs b/src/normalize/macro_expand/test.rs index 262480a..52eb2d4 100644 --- a/src/normalize/macro_expand/test.rs +++ b/src/normalize/macro_expand/test.rs @@ -25,7 +25,7 @@ grammar Foo { ~v:`(~\"Id\" \",\")`* ~e:\"Id\"? => v.into_iter().chain(e.into_iter()).collect(); - `(~\"Id\" \",\")` = ~\"Id\" \",\"; + `(~\"Id\" \",\")`: `\"Id\"` = ~\"Id\" \",\" => (~~); } ").unwrap(); diff --git a/src/normalize/norm_util.rs b/src/normalize/norm_util.rs index 9757959..cd1782d 100644 --- a/src/normalize/norm_util.rs +++ b/src/normalize/norm_util.rs @@ -1,5 +1,5 @@ use intern::InternedString; -use grammar::parse_tree::{Alternative, Symbol}; +use grammar::parse_tree::{Alternative, ExprSymbol, Symbol}; #[derive(Debug)] pub enum AlternativeAction<'a> { @@ -19,36 +19,36 @@ pub fn analyze_action<'a>(alt: &'a Alternative) -> AlternativeAction<'a> { return AlternativeAction::User(code); } - AlternativeAction::Default(analyze_symbols(alt)) + AlternativeAction::Default(analyze_expr(&alt.expr)) } -pub fn analyze_symbols<'a>(alt: &'a Alternative) -> Symbols<'a> { +pub fn analyze_expr<'a>(expr: &'a ExprSymbol) -> Symbols<'a> { // First look for named symbols. let named_symbols: Vec<_> = - alt.expr.symbols - .iter() - .filter_map(|sym| match *sym { - Symbol::Name(id, ref sub) => Some((id, &**sub)), - _ => None, - }) - .collect(); + expr.symbols + .iter() + .filter_map(|sym| match *sym { + Symbol::Name(id, ref sub) => Some((id, &**sub)), + _ => None, + }) + .collect(); if !named_symbols.is_empty() { return Symbols::Named(named_symbols); } // Otherwise, make a tuple of the items they chose with a `~`. let chosen_symbol_types: Vec<_> = - alt.expr.symbols - .iter() - .filter_map(|sym| match *sym { - Symbol::Choose(..) => Some(sym), - _ => None, - }) - .collect(); + expr.symbols + .iter() + .filter_map(|sym| match *sym { + Symbol::Choose(ref sub) => Some(&**sub), + _ => None, + }) + .collect(); if !chosen_symbol_types.is_empty() { return Symbols::Anon(chosen_symbol_types); } // If they didn't choose anything with a `~`, make a tuple of everything. - Symbols::Anon(alt.expr.symbols.iter().collect()) + Symbols::Anon(expr.symbols.iter().collect()) } diff --git a/src/normalize/tyinfer/test.rs b/src/normalize/tyinfer/test.rs index 0aa2f22..619b160 100644 --- a/src/normalize/tyinfer/test.rs +++ b/src/normalize/tyinfer/test.rs @@ -2,7 +2,6 @@ use intern::intern; use parser; use normalize::macro_expand::expand_macros; use normalize::tyinfer::infer_types; -use grammar::parse_tree::TypeRef; use grammar::repr::TypeRepr; fn type_repr(s: &str) -> TypeRepr { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b4db08f..7102696 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -260,7 +260,7 @@ impl<'input> rusty_peg::Symbol<'input,Parser<'input>> for CODE { offset += 1; // move to next byte } - let regex_str = &input.text[input.offset + 1 .. offset - 1]; + let regex_str = &input.text[input.offset .. offset]; let output = rusty_peg::Input { text: input.text, offset: offset }; return Ok((output, regex_str.to_string())); }