From 3305bbdd3ef8ff559bab755932a0dad8e3cf0bec Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Mon, 15 Jun 2015 07:11:32 -0400 Subject: [PATCH] Macro definitions, references. --- src/grammar/parse_tree.rs | 20 +++++++++++------- src/normalize/mod.rs | 42 ++++++++++++++++++++++++++----------- src/parser/mod.rs | 38 ++++++++++++++++++++++++++++++--- src/parser/test.rs | 44 ++++++++++++++++++++++++++------------- 4 files changed, 106 insertions(+), 38 deletions(-) diff --git a/src/grammar/parse_tree.rs b/src/grammar/parse_tree.rs index 0e55eb0..8249d5e 100644 --- a/src/grammar/parse_tree.rs +++ b/src/grammar/parse_tree.rs @@ -39,20 +39,20 @@ grammar Type<'input, T> { // or guard expressions. // Example 1: comma-separated list with optional trailing comma. - Comma<$A>: Vec<$A> = { - ~v:(~$A ",")* ~e:(~$A ,?)?> => { - let mut v = v; v.push(e); v - } + Comma: Vec = { + ~v:(~E ",")* ~e:E? => { + let mut v = v; + if let Some(e) = e { v.push(e); } + v + }; }; // Example 2: conditional patterns - Expr<$M>: Expr = { - + Expr: Expr = { ~Expr "(" ~Comma ")" => Expr::CallExpr(~~~); - ID if $M != "NO_ID" => { + ID if M !~ "NO_ID" => { }; - }; } ``` @@ -83,6 +83,7 @@ pub struct TokenTypeData { #[derive(Clone, Debug)] pub struct NonterminalData { pub name: InternedString, + pub args: Vec, // macro arguments pub type_decl: Option, pub alternatives: Vec } @@ -106,6 +107,9 @@ pub enum Symbol { // foo Nonterminal(InternedString), + // foo<..> + Macro(InternedString, Vec), + // X+ Plus(Box), diff --git a/src/normalize/mod.rs b/src/normalize/mod.rs index 104d7d3..79ea44f 100644 --- a/src/normalize/mod.rs +++ b/src/normalize/mod.rs @@ -13,13 +13,22 @@ pub fn normalize(input: &pt::Grammar) -> Result { // Expands macros // -// X = ...1 Vec ...2 +// X = ...1 Comma ...2 // // to // // X = ...1 Vec_X ...2 -// Vec_X = ...; -mod macro_expand; +// Comma_X: Vec<> = ...; +// +// AFTER THIS POINT: No more macros or macro references, though type +// indirections may occur. +// mod macro_expand; + +// Computes types where the user omitted them (or +// from macro byproducts). +// +// AFTER THIS POINT: All explicit, simple types. +// mod tyinfer; // Converts // @@ -29,7 +38,15 @@ mod macro_expand; // // X = ...1 A_B_C ...2 // A_B_C = A B C -mod nonterminalize; +// +// AFTER THIS POINT: No more Symbol::Expr remain. +// mod nonterminalize; + +// Synthesizes action code for all nonterminals. +// +// AFTER THIS POINT: All nonterminals have action code, and all +// Symbol::Choose and Symbol::Name are removed. +// mod action; // Converts // @@ -39,7 +56,9 @@ mod nonterminalize; // // X = ...1 ...2 // | ...1 Y+ ...2 -mod remove_star; +// +// AFTER THIS POINT: No more Symbol::Star remain. +// mod remove_star; // Converts X+ to a new terminal X_PLUS like: // @@ -47,7 +66,9 @@ mod remove_star; // => { vec![x] } // => { let mut v = v; v.push(e); v } // } -mod remove_plus; +// +// AFTER THIS POINT: No more Symbol::Plus remain. +// mod remove_plus; // Converts // @@ -57,9 +78,6 @@ mod remove_plus; // // X = ...1 ...2 // | ...1 Y ...2 -mod remove_question; - -// Infers types for all nonterminals where possible, or reports a -// suitable error. -mod actionify; - +// +// AFTER THIS POINT: No more Symbol::Question remain. +// mod remove_question; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 458b95f..d5c45f5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -26,12 +26,29 @@ rusty_peg! { ( "=>" ";") => (from, to); NONTERMINAL: GrammarItem = - ( "=" ) => { - GrammarItem::Nonterminal(NonterminalData { name: n, + ( "=" ) => { + GrammarItem::Nonterminal(NonterminalData { name: n.0, + args: n.1, type_decl: t, alternatives: a }) }; + NONTERMINAL_NAME: (InternedString, Vec) = + (NONTERMINAL_NAME_MACRO / NONTERMINAL_NAME_SIMPLE); + + NONTERMINAL_NAME_SIMPLE: (InternedString, Vec) = + () => (a, vec![]); + + NONTERMINAL_NAME_MACRO: (InternedString, Vec) = + ( "<" ">") => { + let mut args = b; + if let Some(c) = c { args.push(c); } + (a, args) + }; + + NONTERMINAL_NAME_MACRO1: InternedString = + ( ",") => a; + NONTERMINAL_TYPE: String = (":" ) => s.to_string(); @@ -66,7 +83,17 @@ rusty_peg! { "?" => Symbol::Question(Box::new(lhs))); SYMBOL0: Symbol = - (TERMINAL_SYMBOL / NT_SYMBOL / EXPR_SYMBOL / NAMED_SYMBOL / CHOSEN_SYMBOL); + (MACRO_SYMBOL / TERMINAL_SYMBOL / NT_SYMBOL / EXPR_SYMBOL / + NAMED_SYMBOL / CHOSEN_SYMBOL); + + MACRO_SYMBOL: Symbol = + ( "<" ">") => { + let mut args = m; + if let Some(n) = n { args.push(n); } + Symbol::Macro(l, args) + }; + + MACRO_ARG_START: Symbol = ( ",") => s; TERMINAL_SYMBOL: Symbol = () => Symbol::Terminal(l); @@ -204,3 +231,8 @@ fn parse_symbol(text: &str) -> Result { let mut parser = Parser::new(()); rusty_peg::Symbol::parse_complete(&SYMBOL, &mut parser, text) } + +fn parse_nonterminal(text: &str) -> Result { + let mut parser = Parser::new(()); + rusty_peg::Symbol::parse_complete(&NONTERMINAL, &mut parser, text) +} diff --git a/src/parser/test.rs b/src/parser/test.rs index ca7b170..794719d 100644 --- a/src/parser/test.rs +++ b/src/parser/test.rs @@ -1,70 +1,84 @@ -use super::{parse_type_name, parse_grammar, parse_alternative, parse_symbol}; - #[test] fn type_name() { - let x = parse_type_name("parser::Enum<'l,T>"); + let x = super::parse_type_name("parser::Enum<'l,T>"); assert_eq!(x.reference(), "::parser::Enum<'l, T>"); } #[test] fn empty_grammar() { - parse_grammar(r#"grammar Foo { }"#).unwrap(); + super::parse_grammar(r#"grammar Foo { }"#).unwrap(); } #[test] fn alternative() { - parse_alternative(r#"Alt => Bar;"#).unwrap(); + super::parse_alternative(r#"Alt => Bar;"#).unwrap(); } #[test] fn symbol() { - parse_symbol(r#"Alt"#).unwrap(); + super::parse_symbol(r#"Alt"#).unwrap(); } #[test] fn nonterminal0() { - parse_grammar(r#"grammar Foo { Expr = Alt; }"#).unwrap(); + super::parse_grammar(r#"grammar Foo { Expr = Alt; }"#).unwrap(); } #[test] fn paren() { - parse_grammar(r#"grammar Foo { Expr = (Alt); }"#).unwrap(); + super::parse_grammar(r#"grammar Foo { Expr = (Alt); }"#).unwrap(); } #[test] fn paren_with_plus() { - parse_grammar(r#"grammar Foo { Expr = (Alt)+; }"#).unwrap(); + super::parse_grammar(r#"grammar Foo { Expr = (Alt)+; }"#).unwrap(); } #[test] fn paren_with_plus_and_anon() { - parse_grammar(r#"grammar Foo { Expr = (~Alt)+; }"#).unwrap(); + super::parse_grammar(r#"grammar Foo { Expr = (~Alt)+; }"#).unwrap(); } #[test] fn named_choice() { - parse_grammar(r#"grammar Foo { Expr = ~n:Alt; }"#).unwrap(); + super::parse_grammar(r#"grammar Foo { Expr = ~n:Alt; }"#).unwrap(); } #[test] fn named_choice_plus() { - parse_grammar(r#"grammar Foo { Expr = ~Alt+; }"#).unwrap(); + super::parse_grammar(r#"grammar Foo { Expr = ~Alt+; }"#).unwrap(); } #[test] fn token_expr() { - parse_grammar(r#"grammar Foo { token Expr where { "foo" => "bar"; }; }"#).unwrap(); + super::parse_grammar(r#"grammar Foo { token Expr where { "foo" => "bar"; }; }"#).unwrap(); } #[test] fn map1() { - parse_grammar( + super::parse_grammar( r#"grammar Foo { Expr = ~n:Alt+ => { { foo } }; }"#).unwrap(); } #[test] fn mapN() { - parse_grammar( + super::parse_grammar( r#"grammar Foo { Expr = { Bar => { Baz }; X ~n:Bar => { Y }; }; }"#).unwrap(); } +#[test] +fn macro_symbols() { + super::parse_symbol(r#"Foo"#).unwrap(); + super::parse_symbol(r#"Foo<"Baz">"#).unwrap(); + super::parse_symbol(r#"Foo<"Baz"+>"#).unwrap(); + super::parse_symbol(r#"Foo<"Baz"+, "Balooga">"#).unwrap(); + super::parse_symbol(r#"Foo<"Baz"+, ("Balooga" Potato),>"#).unwrap(); +} + +#[test] +fn macro_nt() { + super::parse_nonterminal( + r#"Comma: Vec = ~v:(~E ",")* ~e:E? => { let mut v = v; v.extend(e.into_iter()); v};"#) + .unwrap(); +} +