From 3c67e3c8c5eba9eff16d0315b23c367a60e637f9 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Fri, 24 Jul 2015 05:54:53 -0400 Subject: [PATCH] add LALRPOP grammar written in itself; generates quite a lot of output though -- probably need to shift to a less naive LR(1) generation algorithm --- lalrpop/src/newparser.lalrpop | 328 ++++++++++++++++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 lalrpop/src/newparser.lalrpop diff --git a/lalrpop/src/newparser.lalrpop b/lalrpop/src/newparser.lalrpop new file mode 100644 index 0000000..04da165 --- /dev/null +++ b/lalrpop/src/newparser.lalrpop @@ -0,0 +1,328 @@ +use intern::{intern, InternedString}; +use grammar::parse_tree::*; +use grammar::pattern::*; +use std::iter::once; +use tok::{self, Tok}; + +grammar<'input>(text: &'input str); + +pub Grammar: Grammar = + "grammar" + + + + ";" + => { + Grammar { span: Span(lo, hi), + type_parameters: tps.unwrap_or(vec![]), + parameters: parameters.unwrap_or(vec![]), + where_clauses: where_clauses.unwrap_or(vec![]), + items: uses.into_iter().chain(i).collect() } + }; + +GrammarTypeParameters: Vec = + "<" > ">"; + +TypeParameter: TypeParameter = { + => TypeParameter::Lifetime(l); + => TypeParameter::Id(l); +}; + +GrammarParameters: Vec = + "(" > ")"; + +GrammarParameter: Parameter = + ":" => Parameter { name: id, ty: ty }; + +GrammarItem: GrammarItem = { + Use; + ExternToken; + Nonterminal; +}; + +Use: GrammarItem = + ";" => GrammarItem::Use(u); + +Nonterminal: GrammarItem = + + )?> "=" => { + GrammarItem::Nonterminal(NonterminalData { public: p.is_some(), + span: Span(lo, hi), + name: n.0, + args: n.1, + type_decl: t, + alternatives: a }) + }; + +NonterminalName: (NonterminalString, Vec) = { + "<" > ">"; + ; + <"Escape"> => (NonterminalString(intern(<>)), vec![]); +}; + +Alternatives: Vec = { + => vec![a]; + "{" "}" ";"; +}; + +Alternative: Alternative = + )?> ";" => { + Alternative { + span: Span(lo, hi), + expr: s, + condition: c, + action: a + } + }; + +Action: ActionKind = { + "=>@L" => ActionKind::Lookahead; + "=>@R" => ActionKind::Lookbehind; + "> => ActionKind::Code(c.to_string()); + ?"> => ActionKind::Fallible(c.to_string()); +}; + +Cond: Condition = + => { + Condition { span:Span(lo, hi), lhs:a, rhs:b, op:op } + }; + +CondOp: ConditionOp = { + "==" => ConditionOp::Equals; + "!=" => ConditionOp::NotEquals; + "~~" => ConditionOp::Match; + "!~" => ConditionOp::NotMatch; +}; + +Symbol: Symbol = { + "<" @L ":" ">" => + Symbol::new(Span(lo, hi), SymbolKind::Name(l, Box::new(s))); + + "<" ">" => + Symbol::new(Span(lo, hi), SymbolKind::Choose(Box::new(s))); + + Symbol0; +}; + +Symbol0: Symbol = { + Symbol1; + + => + Symbol::new(Span(lhs.span.0, hi), + SymbolKind::Repeat(Box::new(RepeatSymbol { symbol: lhs, op: op }))); +}; + +RepeatOp: RepeatOp = { + "+" => RepeatOp::Plus; + "*" => RepeatOp::Star; + "?" => RepeatOp::Question; +}; + +Symbol1: Symbol = + => Symbol::new(Span(lo, hi), sk); + +SymbolKind1: SymbolKind = { + "<" > ">" => + SymbolKind::Macro(MacroSymbol { name: name, args: args }); + + Terminal => + SymbolKind::Terminal(<>); + + NonterminalId => + SymbolKind::Nonterminal(<>); + + Escape => + SymbolKind::Nonterminal(NonterminalString(<>)); + + "(" ")" => + SymbolKind::Expr(<>); + + "@L" => + SymbolKind::Lookahead; + + "@R" => + SymbolKind::Lookbehind; +}; + +TypeRef: TypeRef = { + "(" > ")" => + TypeRef::Tuple(<>); + + =>? { + panic!("parse escape symbol") + }; + + "&" => + TypeRef::Ref { lifetime: l, + mutable: m.is_some(), + referent: Box::new(t) }; + + "<" > ">" => + TypeRef::Nominal { path: p, types: a }; + + => + match p.as_id() { + Some(id) => TypeRef::Id(id), + None => TypeRef::Nominal { path: p, types: vec![] } + }; +}; + +TypeRefOrLifetime: TypeRef = { + TypeRef; + Lifetime => TypeRef1::Lifetime(<>); +}; + +Path: Path = + "::")*> => { + Path { absolute: a.is_some(), + ids: h.into_iter().chain(once(t)).collect() } + }; + +ExternToken: GrammarItem = + "extern" "token" "{" + + "enum" "{" + > + "}" + + "}" => { + GrammarItem::ExternToken(ExternToken { + span: Span(lo0, hi0), + associated_types: a0.into_iter().chain(a1).collect(), + enum_token: EnumToken { + type_name: t, + type_span: Span(lo, hi), + conversions: c, + } + }) + }; + +AssociatedType: AssociatedType = + "type" "=" ";" => { + AssociatedType { type_span: Span(lo, hi), + type_name: n, + type_ref: t } + }; + +Conversion: Conversion = + "> =>? { + let pattern = try!(parse_pattern(p)); + Conversion { span: Span(lo, hi), from: from, to: pattern } + }; + +pub Pattern: Pattern = + => Pattern { span: Span(lo, hi), kind: k }; + +PatternKind: PatternKind = { + "(" > ")" => + PatternKind::Enum(<>); + + "{" ",")*> "}" => + PatternKind::Struct(p, a0.into_iter().chain(a1).collect(), false); + + "{" ",")*> ".." "}" => + PatternKind::Struct(p, a0, true); + + "_" => + PatternKind::Underscore; + + ".." => + PatternKind::DotDot; + + "<" ">" => + PatternKind::Choose(<>); + + "(" > ")" => + PatternKind::Tuple(<>); + + => + PatternKind::Path(<>); +}; + +FieldPattern: FieldPattern = + ":" => { + FieldPattern { field_span: Span(lo, hi), + field_name: id, + pattern: pat } + }; + +MacroId: NonterminalString = + => NonterminalString(intern(i)); + +NonterminalId: NonterminalString = + => NonterminalString(i); + +Id: InternedString = + => intern(i); + +Escape: InternedString = + => intern(i); + +Lifetime: InternedString = + => intern(i); + +Terminal: TerminalString = + => TerminalString(s); + +StringLiteral: InternedString = + => intern(s); + +Comma: Vec = + ",")*> => + v0.into_iter().chain(e1).collect(); + +extern token { + type Location = usize; + type Error = tok::Error; + enum Tok<'input> { + "enum" => Tok::Enum(..), + "extern" => Tok::Extern(..), + "grammar" => Tok::Grammar(..), + "if" => Tok::If(..), + "mut" => Tok::Mut(..), + "pub" => Tok::Pub(..), + "token" => Tok::Token(..), + "type" => Tok::Type(..), + "use" => Tok::Use(..), + "where" => Tok::Where(..), + + "Escape" => Tok::Escape(<&'input str>), + "Id" => Tok::Id(<&'input str>), + "MacroId" => Tok::MacroId(<&'input str>), + "Lifetime" => Tok::Lifetime(<&'input str>), + "StringLiteral" => Tok::StringLiteral(<&'input str>), + + "&" => Tok::Ampersand(..), + "!=" => Tok::BangEquals(..), + "!~" => Tok::BangTilde(..), + ":" => Tok::Colon(..), + "::" => Tok::ColonColon(..), + "," => Tok::Comma(..), + ".." => Tok::DotDot(..), + "=" => Tok::Equals(..), + "==" => Tok::EqualsEquals(..), + "=>" => Tok::EqualsGreaterThanCode(<&'input str>), + "=>?" => Tok::EqualsGreaterThanQuestionCode(<&'input str>), + "=>@L" => Tok::EqualsGreaterThanLookahead(..), + "=>@R" => Tok::EqualsGreaterThanLookbehind(..), + ">" => Tok::GreaterThan(..), + "{" => Tok::LeftBrace(..), + "[" => Tok::LeftBracket(..), + "(" => Tok::LeftParen(..), + "<" => Tok::LessThan(..), + "@L" => Tok::Lookahead(..), + "@R" => Tok::Lookbehind(..), + "+" => Tok::Plus(..), + "?" => Tok::Question(..), + "}" => Tok::RightBrace(..), + "]" => Tok::RightBracket(..), + ")" => Tok::RightParen(..), + ";" => Tok::Semi(..), + "*" => Tok::Star(..), + "~~" => Tok::TildeTilde(..), + "_" => Tok::Underscore(..), + } +} + + +