From 6192b0b2a1f4fa94d6889734b8093bac1c2f03c2 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Thu, 25 Jun 2015 22:39:11 -0700 Subject: [PATCH] refactor grammar to prevent undesirable nesting of symbols and so forth --- lalrpop/src/grammar/parse_tree.rs | 2 +- lalrpop/src/normalize/tyinfer/test.rs | 4 ++-- lalrpop/src/parser/mod.rs | 22 +++++++++--------- lalrpop/src/parser/test.rs | 32 ++++++++++++++++++++++----- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/lalrpop/src/grammar/parse_tree.rs b/lalrpop/src/grammar/parse_tree.rs index 3deb352..d752b53 100644 --- a/lalrpop/src/grammar/parse_tree.rs +++ b/lalrpop/src/grammar/parse_tree.rs @@ -174,7 +174,7 @@ pub enum Symbol { // ~X Choose(Box), - // ~x:X + // x:X Name(InternedString, Box), } diff --git a/lalrpop/src/normalize/tyinfer/test.rs b/lalrpop/src/normalize/tyinfer/test.rs index c377b3c..4a124cc 100644 --- a/lalrpop/src/normalize/tyinfer/test.rs +++ b/lalrpop/src/normalize/tyinfer/test.rs @@ -142,7 +142,7 @@ grammar { X = { Y; - ~l:X "+" ~r:Y => l + r; + l:X "+" r:Y => l + r; }; Y: i32 = "foo" => 22; @@ -162,7 +162,7 @@ grammar { X = { Y; Z; - ~l:X \"+\" ~r:Y => l + r; + l:X \"+\" r:Y => l + r; }; Y: i32 = \"foo\" => 22; diff --git a/lalrpop/src/parser/mod.rs b/lalrpop/src/parser/mod.rs index e4afb65..45adef1 100644 --- a/lalrpop/src/parser/mod.rs +++ b/lalrpop/src/parser/mod.rs @@ -102,7 +102,16 @@ rusty_peg! { // Symbols SYMBOL: Symbol = - fold(, + (NAMED_SYMBOL / CHOSEN_SYMBOL / SYMBOL0); + + NAMED_SYMBOL: Symbol = + ( ":" ) => Symbol::Name(l, Box::new(s)); + + CHOSEN_SYMBOL: Symbol = + ("~" ) => Symbol::Choose(Box::new(s)); + + SYMBOL0: Symbol = + fold(, ( ) => { Symbol::Repeat(Box::new(RepeatSymbol { span: Span(lo, hi), symbol: lhs, @@ -114,9 +123,8 @@ rusty_peg! { REPEAT_OP_STAR: RepeatOp = "*" => RepeatOp::Star; REPEAT_OP_QUESTION: RepeatOp = "?" => RepeatOp::Question; - SYMBOL0: Symbol = - (NAMED_SYMBOL / CHOSEN_SYMBOL / MACRO_SYMBOL / TERMINAL_SYMBOL / - NT_SYMBOL / ESCAPE_SYMBOL / PAREN_SYMBOL); + SYMBOL1: Symbol = + (MACRO_SYMBOL / TERMINAL_SYMBOL / NT_SYMBOL / ESCAPE_SYMBOL / PAREN_SYMBOL); MACRO_SYMBOL: Symbol = ( "<" ">" ) => { @@ -145,12 +153,6 @@ rusty_peg! { ( ) => ExprSymbol { span: Span(lo, hi), symbols: s }; - NAMED_SYMBOL: Symbol = - ( ":" ) => Symbol::Name(l, Box::new(s)); - - CHOSEN_SYMBOL: Symbol = - ("~" ) => Symbol::Choose(Box::new(s)); - // TypeRef TYPE_REF: TypeRef = diff --git a/lalrpop/src/parser/test.rs b/lalrpop/src/parser/test.rs index 936ccbf..235126e 100644 --- a/lalrpop/src/parser/test.rs +++ b/lalrpop/src/parser/test.rs @@ -1,4 +1,4 @@ -use grammar::parse_tree::TypeRef; +use grammar::parse_tree::{Symbol, TypeRef}; #[test] fn type_ref() { @@ -66,7 +66,7 @@ fn paren_with_plus_and_anon() { #[test] fn named_choice() { - super::parse_grammar(r#"grammar { Expr = ~n:Alt; }"#).unwrap(); + super::parse_grammar(r#"grammar { Expr = n:Alt; }"#).unwrap(); } #[test] @@ -82,14 +82,14 @@ fn token_expr() { #[test] fn map1() { super::parse_grammar( - r#"grammar { Expr = ~n:Alt+ => { { foo } }; }"#).unwrap(); + r#"grammar { Expr = n:Alt+ => { { foo } }; }"#).unwrap(); } #[test] #[allow(non_snake_case)] fn mapN() { super::parse_grammar( - r#"grammar { Expr = { Bar => { Baz }; X ~n:Bar => { Y }; }; }"#).unwrap(); + r#"grammar { Expr = { Bar => { Baz }; X n:Bar => { Y }; }; }"#).unwrap(); } #[test] @@ -101,10 +101,32 @@ fn macro_symbols() { super::parse_symbol(r#"Foo<"Baz"+, ("Balooga" Potato),>"#).unwrap(); } +#[test] +fn symbol_precedence() { + // check that we parse this as choosing a X+ + let sym = super::parse_symbol(r#"~X+"#).unwrap(); + assert!(match sym { + Symbol::Choose(..) => true, + _ => false + }); + + let sym = super::parse_symbol(r#"n:X+"#).unwrap(); + assert!(match sym { + Symbol::Name(..) => true, + _ => false + }); +} + +#[test] +fn symbol_choose_name() { + // check that we can parse ~S and x:S but not both + assert!(super::parse_symbol(r#"~x:X+"#).is_err()); +} + #[test] fn macro_nt() { super::parse_nonterminal( - r#"Comma: Vec = ~v:(~E ",")* ~e:E? => v.into_iter().chain(e.into_iter()).collect();"#) + r#"Comma: Vec = v:(~E ",")* e:E? => v.into_iter().chain(e.into_iter()).collect();"#) .unwrap(); }