From 7bf7f686d950c2ea6a4beeb472bc1120afb42601 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Mon, 15 Jun 2015 07:49:29 -0400 Subject: [PATCH] Richer typeref model --- src/grammar/mod.rs | 1 - src/grammar/parse_tree.rs | 28 ++++++++++-- src/grammar/token/mod.rs | 17 ++----- src/grammar/token/test.rs | 14 ------ src/grammar/ty.rs | 51 --------------------- src/parser/mod.rs | 93 +++++++++++++++++++++------------------ src/parser/test.rs | 21 +++++++-- 7 files changed, 97 insertions(+), 128 deletions(-) delete mode 100644 src/grammar/ty.rs diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs index c6f2fb0..6b42920 100644 --- a/src/grammar/mod.rs +++ b/src/grammar/mod.rs @@ -5,5 +5,4 @@ use intern::{self, InternedString}; pub mod parse_tree; pub mod repr; pub mod token; -pub mod ty; diff --git a/src/grammar/parse_tree.rs b/src/grammar/parse_tree.rs index 0c732e9..45e955a 100644 --- a/src/grammar/parse_tree.rs +++ b/src/grammar/parse_tree.rs @@ -60,11 +60,10 @@ grammar Type<'input, T> { */ use intern::InternedString; -use grammar::ty::TypeName; #[derive(Clone, Debug)] pub struct Grammar { - pub type_name: TypeName, + pub type_name: TypeRef, pub items: Vec, } @@ -76,15 +75,36 @@ pub enum GrammarItem { #[derive(Clone, Debug)] pub struct TokenTypeData { - pub type_name: TypeName, + pub type_name: TypeRef, pub conversions: Vec<(InternedString, InternedString)>, } +#[derive(Clone, Debug)] +pub enum TypeRef { + // (T1, T2) + Tuple(Vec), + + // Foo<'a, 'b, T1, T2>, Foo::Bar, etc + Nominal { + path: Vec, + types: Vec + }, + + // 'x ==> only should appear within nominal types, but what do we care + Lifetime(InternedString), + + // Foo or Bar ==> treated specially since macros may care + Id(InternedString), + + // ==> type of a nonterminal, emitted by macro expansion + Nonterminal(InternedString), +} + #[derive(Clone, Debug)] pub struct NonterminalData { pub name: InternedString, pub args: Vec, // macro arguments - pub type_decl: Option, + pub type_decl: Option, pub alternatives: Vec } diff --git a/src/grammar/token/mod.rs b/src/grammar/token/mod.rs index 45c1d01..b4e3c0e 100644 --- a/src/grammar/token/mod.rs +++ b/src/grammar/token/mod.rs @@ -1,21 +1,21 @@ use std::collections::{HashMap}; +use grammar::parse_tree::TypeRef; use intern::InternedString; -use grammar::ty::TypeName; #[cfg(test)] mod test; pub struct TokenDefinition { // if the enum type is `foo::bar::baz` then: - enum_type: TypeName, + enum_type: TypeRef, // map from a custom string, like `"("` to a variant name like LPAREN token_map: HashMap, } impl TokenDefinition { - pub fn new(enum_type: TypeName, + pub fn new(enum_type: TypeRef, token_map: Vec<(InternedString, InternedString)>) -> TokenDefinition { @@ -25,16 +25,7 @@ impl TokenDefinition { } } - pub fn enum_type(&self) -> &TypeName { + pub fn enum_type(&self) -> &TypeRef { &self.enum_type } - - pub fn match_pattern(&self, name: InternedString) -> String { - let variant_name = match self.token_map.get(&name) { - Some(&v) => v, - None => name, - }; - - format!("{}::{}(..)", self.enum_type.path(), variant_name) - } } diff --git a/src/grammar/token/test.rs b/src/grammar/token/test.rs index 9b530de..58151fe 100644 --- a/src/grammar/token/test.rs +++ b/src/grammar/token/test.rs @@ -1,17 +1,3 @@ use intern::intern; -use grammar::ty::TypeName; use grammar::token::*; -fn test_token_defn() -> TokenDefinition { - TokenDefinition::new(TypeName::new(vec![intern("parser")], - intern("Token"), - vec![intern("'input")]), - vec![(intern("("), intern("LPAREN")), - (intern("R"), intern("LPAREN"))]) -} - -#[test] -fn test_match_pattern() { - let defn = test_token_defn(); - assert_eq!(defn.match_pattern(intern("(")), "::parser::Token::LPAREN(..)"); -} diff --git a/src/grammar/ty.rs b/src/grammar/ty.rs deleted file mode 100644 index ff0b057..0000000 --- a/src/grammar/ty.rs +++ /dev/null @@ -1,51 +0,0 @@ -/*! - * Simple representation of Rust types. Really only understands two - * things: named types, and tuples. - */ - -use intern::{self, InternedString}; - -#[derive(Clone, Debug)] -pub struct TypeName { - pub module: Vec, - pub type_name: InternedString, - pub parameters: Vec, -} - -impl TypeName { - pub fn new(module: Vec, - type_name: InternedString, - parameters: Vec) - -> TypeName - { - TypeName { module: module, type_name: type_name, parameters: parameters } - } - - pub fn path(&self) -> String { - if self.module.is_empty() { - format!("::{}", self.type_name) - } else { - format!("::{}::{}", connect(&self.module, "::"), self.type_name) - } - } - - pub fn reference(&self) -> String { - format!("{}<{}>", self.path(), connect(&self.parameters, ", ")) - } -} - -fn connect(strs: &[InternedString], sep: &str) -> String { - let mut buf = String::new(); - intern::read(|interner| { - let mut iter = strs.iter(); - if let Some(&v) = iter.next() { - buf.push_str(interner.data(v)); - while let Some(&v) = iter.next() { - buf.push_str(sep); - buf.push_str(interner.data(v)); - } - } - }); - buf -} - diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 05e538e..93309b9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,5 +1,4 @@ use intern::{intern, InternedString}; -use grammar::ty::TypeName; use grammar::parse_tree::*; use rusty_peg; @@ -10,7 +9,7 @@ rusty_peg! { parser Parser<'input> { // Grammar GRAMMAR: Grammar = - ("grammar" "{" "}") => { + ("grammar" "{" "}") => { Grammar { type_name: t, items: i } }; @@ -18,7 +17,7 @@ rusty_peg! { (TOKEN_TYPE / NONTERMINAL); TOKEN_TYPE: GrammarItem = - ("token" "where" "{" "}" ";") => { + ("token" "where" "{" "}" ";") => { GrammarItem::TokenType(TokenTypeData {type_name: t, conversions: c }) }; @@ -49,13 +48,8 @@ rusty_peg! { NONTERMINAL_NAME_MACRO1: InternedString = ( ",") => a; - NONTERMINAL_TYPE: String = - (":" ) => s.to_string(); - - // FIXME this isn't really right; we should be gobbling up token - // trees here until we find an "=" - NOT_EQ: &'input str = - regex("[^=]+"); + NONTERMINAL_TYPE: TypeRef = + (":" ) => s; ALTERNATIVES: Vec = (ALTERNATIVES1 / ALTERNATIVESN); @@ -131,38 +125,53 @@ rusty_peg! { CHOSEN_SYMBOL: Symbol = ("~" ) => Symbol::Choose(Box::new(s)); - // TypeName + // TypeRef - TYPE_NAME: TypeName = - ( ) => { - TypeName::new(prefix, name, suffix) + TYPE_REF: TypeRef = + (TUPLE_TYPE_REF / LIFETIME_TYPE_REF / NOMINAL_TYPE_REF); + + TUPLE_TYPE_REF: TypeRef = + ("(" ")") => TypeRef::Tuple(l); + + LIFETIME_TYPE_REF: TypeRef = + () => TypeRef::Lifetime(l); + + NOMINAL_TYPE_REF: TypeRef = + ( ) => { + if p.len() == 1 && a.is_none() { + // detect something like `Foo` and treat it specially, + // so that macro expansion can pattern match here + TypeRef::Id(p.into_iter().next().unwrap()) + } else { + // otherwise, `Vec<..>` or `Foo::Bar` etc expand to + // this full path + TypeRef::Nominal { path: p, types: a.unwrap_or(vec![]) } + } }; - PATH_COMPONENT: InternedString = + NOMINAL_TYPE_REF_ARGS: Vec = + ("<" ">") => l; + + TYPE_REF_LIST: Vec = + ( ) => { + let mut a = a; + a.extend(t.into_iter()); + a + }; + + TYPE_REF_COMMA: TypeRef = + ( ",") => t; + + PATH: Vec = + ( ) => { + let mut b = b; + b.push(c); + b + }; + + PATH_BASE: InternedString = ( "::") => i; - PATH_SUFFIX: Vec = - () => p.unwrap_or(Vec::new()); - - PATH_SUFFIX_1: Vec = - ("<" ">") => p; - - PATH_PARAMETERS: Vec = - fold(, - ("," ) => { let mut p = p; p.push(q); p }); - - PATH_PARAMETER0: Vec = - () => vec![p]; - - PATH_PARAMETER: InternedString = - (PATH_PARAMETER_TYPE / PATH_PARAMETER_LIFETIME); - - PATH_PARAMETER_TYPE: InternedString = - ID; - - PATH_PARAMETER_LIFETIME: InternedString = - LIFETIME; - // IDENTIFIERS, LIFETIMES ID: InternedString = @@ -233,11 +242,6 @@ impl<'input> rusty_peg::Symbol<'input,Parser<'input>> for CODE { } } -pub fn parse_type_name(text: &str) -> TypeName { - let mut parser = Parser::new(()); - rusty_peg::Symbol::parse_complete(&TYPE_NAME, &mut parser, text).unwrap() -} - pub fn parse_grammar(text: &str) -> Result { let mut parser = Parser::new(()); rusty_peg::Symbol::parse_complete(&GRAMMAR, &mut parser, text) @@ -257,3 +261,8 @@ fn parse_nonterminal(text: &str) -> Result { let mut parser = Parser::new(()); rusty_peg::Symbol::parse_complete(&NONTERMINAL, &mut parser, text) } + +fn parse_type_ref(text: &str) -> Result { + let mut parser = Parser::new(()); + rusty_peg::Symbol::parse_complete(&TYPE_REF, &mut parser, text) +} diff --git a/src/parser/test.rs b/src/parser/test.rs index 078c66c..b2ae295 100644 --- a/src/parser/test.rs +++ b/src/parser/test.rs @@ -1,7 +1,22 @@ +use grammar::parse_tree::TypeRef; + #[test] -fn type_name() { - let x = super::parse_type_name("parser::Enum<'l,T>"); - assert_eq!(x.reference(), "::parser::Enum<'l, T>"); +fn type_ref() { + super::parse_type_ref("parser::Enum<'l,T>").unwrap(); +} + +#[test] +fn type_ref_tuple() { + super::parse_type_ref("(X,Y)").unwrap(); +} + +#[test] +fn type_ref_special_case_for_id() { + let x = super::parse_type_ref("X").unwrap(); + assert!(match x { + TypeRef::Id(_) => true, + _ => false + }); } #[test]