Rewrite type inferencer to produce a separate Types table

using a simpler types grammar than rewrite the tree in place
This commit is contained in:
Niko Matsakis 2015-06-17 06:05:11 -04:00
parent 975ae0a200
commit 9200285d49
10 changed files with 319 additions and 214 deletions

View File

@ -1,6 +1,6 @@
//! The grammar definition.
pub mod parse_tree;
// pub mod repr;
pub mod repr;
// pub mod token;

View File

@ -59,8 +59,10 @@ grammar Type<'input, T> {
*/
use intern::InternedString;
use intern::{intern, InternedString};
use grammar::repr::TypeRepr;
use std::fmt::{Display, Formatter, Error};
use util::Sep;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Grammar {
@ -124,19 +126,11 @@ pub struct Alternative {
pub condition: Option<Condition>,
// => { code }
pub action: Option<Action>,
pub action: Option<String>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Action {
// code provided by the user
User(String),
// an index into a side-list of action fns, which is setup to take
// all of the values in this alternative as arguments, dropping
// the ones it doesn't care about.
Fn(u32),
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct ActionFnIndex(u32);
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Condition {
@ -291,22 +285,6 @@ impl Display for MacroSymbol {
}
}
struct Sep<S>(&'static str, S);
impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
let &Sep(sep, vec) = self;
let mut elems = vec.iter();
if let Some(elem) = elems.next() {
try!(write!(fmt, "{}", elem));
while let Some(elem) = elems.next() {
try!(write!(fmt, "{}{}", sep, elem));
}
}
Ok(())
}
}
impl Display for TypeRef {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
@ -325,3 +303,26 @@ impl Display for TypeRef {
}
}
}
impl ActionFnIndex {
pub fn new(x: usize) -> ActionFnIndex {
ActionFnIndex(x as u32)
}
pub fn index(&self) -> usize {
self.0 as usize
}
}
impl RepeatOp {
pub fn type_repr(&self, symbol_type: TypeRepr) -> TypeRepr {
let path = match *self {
RepeatOp::Plus |
RepeatOp::Star =>
vec![intern("std"), intern("vec"), intern("Vec")],
RepeatOp::Question =>
vec![intern("std"), intern("option"), intern("Option")],
};
TypeRepr::Nominal { path: path, types: vec![symbol_type] }
}
}

View File

@ -1,31 +1,59 @@
/*!
* Compiled representation of a grammar. Simplified, normalized
* version of parse-tree.
* version of `parse_tree`. The normalization passes produce this
* representation incrementally.
*/
use intern::InternedString;
use std::collections::HashMap;
use std::fmt::{Display, Formatter, Error};
use util::Sep;
pub struct Grammar {
pub nonterminals: Vec<Nonterminal>,
pub action_fns: Vec<ActionFn>,
}
pub struct Nonterminal {
name: InternedString,
alternatives: Vec<Alternative>,
action_fn: usize
}
pub struct Alternative {
symbols: Vec<Symbol>
}
pub enum Symbol {
Terminal(InternedString),
Nonterminal(InternedString),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ActionFn {
code: String
arg_names: Vec<InternedString>,
arg_types: Vec<TypeRepr>,
ret_type: Vec<TypeRepr>,
code: String,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum TypeRepr {
Tuple(Vec<TypeRepr>),
Nominal { path: Vec<InternedString>, types: Vec<TypeRepr> },
Lifetime(InternedString),
}
#[derive(Debug)]
pub struct Types {
nonterminal_types: HashMap<InternedString, TypeRepr>
}
impl Types {
pub fn new() -> Types {
Types { nonterminal_types: HashMap::new() }
}
pub fn add_type(&mut self, nt_id: InternedString, ty: TypeRepr) {
assert!(self.nonterminal_types.insert(nt_id, ty).is_none());
}
pub fn nt_type(&self, nt_id: InternedString) -> Option<&TypeRepr> {
self.nonterminal_types.get(&nt_id)
}
}
impl Display for TypeRepr {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
TypeRepr::Tuple(ref types) =>
write!(fmt, "({})", Sep(", ", types)),
TypeRepr::Nominal { ref path, ref types } if types.len() == 0 =>
write!(fmt, "{}", Sep("::", path)),
TypeRepr::Nominal { ref path, ref types } =>
write!(fmt, "{}<{}>", Sep("::", path), Sep(", ", types)),
TypeRepr::Lifetime(id) =>
write!(fmt, "{}", id),
}
}
}

View File

@ -9,6 +9,7 @@ mod grammar;
mod intern;
mod normalize;
mod parser;
mod util;
#[cfg(not(test))]
fn main() {

View File

@ -23,9 +23,6 @@ macro_rules! return_err {
}
}
#[cfg(test)]
mod test_util;
// These are executed *IN ORDER*:
// Expands macros and expressions
@ -87,3 +84,12 @@ mod tyinfer;
//
// AFTER THIS POINT: No more Symbol::Question remain.
// mod remove_question;
///////////////////////////////////////////////////////////////////////////
// Shared routines
#[cfg(test)]
mod test_util;
mod norm_util;

View File

@ -0,0 +1,54 @@
use intern::InternedString;
use grammar::parse_tree::{Alternative, Symbol};
#[derive(Debug)]
pub enum AlternativeAction<'a> {
User(&'a str),
Default(Symbols<'a>),
}
#[derive(Debug)]
pub enum Symbols<'a> {
Named(Vec<(InternedString, &'a Symbol)>),
Anon(Vec<&'a Symbol>),
}
pub fn analyze_action<'a>(alt: &'a Alternative) -> AlternativeAction<'a> {
// We can't infer types for alternatives with actions
if let Some(ref code) = alt.action {
return AlternativeAction::User(code);
}
AlternativeAction::Default(analyze_symbols(alt))
}
pub fn analyze_symbols<'a>(alt: &'a Alternative) -> Symbols<'a> {
// First look for named symbols.
let named_symbols: Vec<_> =
alt.expr.symbols
.iter()
.filter_map(|sym| match *sym {
Symbol::Name(id, ref sub) => Some((id, &**sub)),
_ => None,
})
.collect();
if !named_symbols.is_empty() {
return Symbols::Named(named_symbols);
}
// Otherwise, make a tuple of the items they chose with a `~`.
let chosen_symbol_types: Vec<_> =
alt.expr.symbols
.iter()
.filter_map(|sym| match *sym {
Symbol::Choose(..) => Some(sym),
_ => None,
})
.collect();
if !chosen_symbol_types.is_empty() {
return Symbols::Anon(chosen_symbol_types);
}
// If they didn't choose anything with a `~`, make a tuple of everything.
Symbols::Anon(alt.expr.symbols.iter().collect())
}

View File

@ -1,40 +1,41 @@
use super::{NormResult, NormError};
use super::norm_util::{self, AlternativeAction, Symbols};
use std::collections::{HashMap};
use intern::{intern, InternedString};
use intern::{InternedString};
use grammar::parse_tree::{Alternative, Grammar, GrammarItem,
NonterminalData, RepeatSymbol, RepeatOp,
Span, Symbol, TypeRef};
use normalize::{NormResult, NormError};
NonterminalData, RepeatSymbol, Span, Symbol, TypeRef};
use grammar::repr::{Types, TypeRepr};
#[cfg(test)]
mod test;
pub fn infer_types(mut grammar: Grammar) -> NormResult<Grammar> {
{
let mut inferencer = try!(TypeInferencer::new(&mut grammar));
try!(inferencer.infer_types());
}
Ok(grammar)
pub fn infer_types(grammar: &Grammar) -> NormResult<Types> {
let inferencer = try!(TypeInferencer::new(&grammar));
inferencer.infer_types()
}
struct TypeInferencer<'a> {
token_type: TypeRef,
struct TypeInferencer<'grammar> {
token_type: &'grammar TypeRef,
stack: Vec<InternedString>,
nonterminals: HashMap<InternedString, NT<'a>>,
nonterminals: HashMap<InternedString, NT<'grammar>>,
types: Types,
}
struct NT<'a> {
#[derive(Copy, Clone)]
struct NT<'grammar> {
span: Span,
type_decl: &'a mut Option<TypeRef>,
alternatives: &'a Vec<Alternative>,
type_decl: &'grammar Option<TypeRef>,
alternatives: &'grammar Vec<Alternative>,
}
fn extract_token_type(grammar: &Grammar) -> NormResult<TypeRef> {
fn extract_token_type(grammar: &Grammar) -> NormResult<&TypeRef> {
let mut token_types =
grammar.items
.iter()
.filter_map(|item| {
match *item {
GrammarItem::TokenType(ref data) => Some(data.type_name.clone()),
GrammarItem::TokenType(ref data) => Some(&data.type_name),
_ => None,
}
});
@ -52,19 +53,19 @@ fn extract_token_type(grammar: &Grammar) -> NormResult<TypeRef> {
Ok(token_type)
}
impl<'a> TypeInferencer<'a> {
fn new(grammar: &'a mut Grammar) -> NormResult<TypeInferencer<'a>> {
impl<'grammar> TypeInferencer<'grammar> {
fn new(grammar: &'grammar Grammar) -> NormResult<TypeInferencer<'grammar>> {
let token_type =
try!(extract_token_type(grammar));
let nonterminals =
grammar.items
.iter_mut()
.iter()
.filter_map(|item| {
match *item {
GrammarItem::TokenType(..) =>
None,
GrammarItem::Nonterminal(ref mut data) => {
GrammarItem::Nonterminal(ref data) => {
assert!(!data.is_macro_def()); // normalized away by now
Some((data.name, NT::new(data)))
}
@ -74,53 +75,53 @@ impl<'a> TypeInferencer<'a> {
Ok(TypeInferencer { token_type: token_type,
stack: vec![],
nonterminals: nonterminals })
nonterminals: nonterminals,
types: Types::new() })
}
fn infer_types(&mut self) -> NormResult<()> {
fn infer_types(mut self) -> NormResult<Types> {
let ids: Vec<InternedString> =
self.nonterminals.iter()
.map(|(&id, _)| id)
.collect();
for id in ids {
let ty = try!(self.nonterminal_type(id));
*self.nonterminals.get_mut(&id).unwrap().type_decl = Some(ty);
try!(self.nonterminal_type(id));
debug_assert!(self.types.nt_type(id).is_some());
}
Ok(())
Ok(self.types)
}
fn nonterminal_type(&mut self, id: InternedString) -> NormResult<TypeRef> {
let (span, type_decl, alternatives) = {
let nt = &self.nonterminals[&id];
(nt.span, nt.type_decl.clone(), nt.alternatives)
};
if self.stack.contains(&id) {
return_err!(span, "cannot infer type of `{}` because it references itself", id);
fn nonterminal_type(&mut self, id: InternedString) -> NormResult<TypeRepr> {
if let Some(repr) = self.types.nt_type(id) {
return Ok(repr.clone());
}
self.push(id, |this| {
let type_decl = type_decl; // FIXME rustc bug requires thisx
if let Some(mut type_decl) = type_decl {
try!(this.refresh_type(&mut type_decl));
return Ok(type_decl);
let nt = self.nonterminals[&id];
if self.stack.contains(&id) {
return_err!(nt.span, "cannot infer type of `{}` because it references itself", id);
}
let ty = try!(self.push(id, |this| {
if let &Some(ref type_decl) = nt.type_decl {
return this.type_ref(type_decl);
}
let mut alternative_types: Vec<TypeRef> =
try!(alternatives.iter()
.map(|alt| this.alternative_type(alt))
.collect());
let mut alternative_types: Vec<_> = try! {
nt.alternatives.iter()
.map(|alt| this.alternative_type(alt))
.collect()
};
// if there are no alternatives, then call it an error
if alternative_types.is_empty() {
return_err!(span,
return_err!(nt.span,
"nonterminal `{}` has no alternatives and hence parse cannot succeed",
id);
}
for (ty, alt) in alternative_types[1..].iter().zip(&alternatives[1..]) {
for (ty, alt) in alternative_types[1..].iter().zip(&nt.alternatives[1..]) {
if &alternative_types[0] != ty {
return_err!(alt.expr.span,
"type of this alternative is `{}`, \
@ -130,11 +131,14 @@ impl<'a> TypeInferencer<'a> {
}
Ok(alternative_types.pop().unwrap())
})
}));
self.types.add_type(id, ty.clone());
Ok(ty)
}
fn push<F,R>(&mut self, id: InternedString, f: F) -> R
where F: FnOnce(&mut TypeInferencer) -> R
fn push<F,R>(&mut self, id: InternedString, f: F) -> NormResult<R>
where F: FnOnce(&mut TypeInferencer) -> NormResult<R>
{
self.stack.push(id);
let r = f(self);
@ -142,75 +146,58 @@ impl<'a> TypeInferencer<'a> {
r
}
fn refresh_type(&mut self, type_ref: &mut TypeRef) -> NormResult<()> {
let replacement = match *type_ref {
TypeRef::Tuple(ref mut types) |
TypeRef::Nominal { path: _, ref mut types } => {
for t in types {
try!(self.refresh_type(t));
}
return Ok(());
fn type_ref(&mut self, type_ref: &TypeRef) -> NormResult<TypeRepr> {
match *type_ref {
TypeRef::Tuple(ref types) => {
let types = try! {
types.iter().map(|t| self.type_ref(t)).collect()
};
Ok(TypeRepr::Tuple(types))
}
TypeRef::Lifetime(_) |
TypeRef::Id(_) => {
return Ok(());
TypeRef::Nominal { ref path, ref types } => {
let types = try! {
types.iter().map(|t| self.type_ref(t)).collect()
};
Ok(TypeRepr::Nominal { path: path.clone(), types: types })
}
TypeRef::Lifetime(id) => {
Ok(TypeRepr::Lifetime(id))
}
TypeRef::Id(id) => {
Ok(TypeRepr::Nominal { path: vec![id], types: vec![] })
}
TypeRef::OfSymbol(ref symbol) => {
try!(self.symbol_type(symbol))
self.symbol_type(symbol)
}
};
*type_ref = replacement;
Ok(())
}
}
fn alternative_type(&mut self, alt: &Alternative) -> NormResult<TypeRef> {
// We can't infer types for alternatives with actions
if alt.action.is_some() {
return_err!(alt.span, "cannot infer types if there is custom action code");
}
fn alternative_type(&mut self, alt: &Alternative) -> NormResult<TypeRepr> {
match norm_util::analyze_action(alt) {
AlternativeAction::User(_) => {
return_err!(alt.span, "cannot infer types if there is custom action code");
}
// We can't infer types if there are named symbols, because
// that should be a struct and we don't know which one.
let named_symbols =
alt.expr.symbols
.iter()
.filter(|sym| match **sym {
Symbol::Name(..) => true,
_ => false,
});
for sym in named_symbols {
return_err!(alt.span,
"cannot infer types in the presence of named symbols like `{}`",
sym);
}
AlternativeAction::Default(Symbols::Named(ref syms)) => {
return_err!(alt.span,
"cannot infer types in the presence of named symbols like `~{}:{}`",
syms[0].0, syms[0].1);
}
// Otherwise, make a tuple of the items they chose with a `~`.
let chosen_symbol_types: Vec<TypeRef> = try! {
alt.expr.symbols
.iter()
.filter_map(|sym| match *sym {
Symbol::Choose(..) => Some(self.symbol_type(sym)),
_ => None,
})
.collect()
};
if !chosen_symbol_types.is_empty() {
return Ok(maybe_tuple(chosen_symbol_types));
AlternativeAction::Default(Symbols::Anon(syms)) => {
let symbol_types: Vec<TypeRepr> = try! {
syms.iter()
.map(|sym| self.symbol_type(sym))
.collect()
};
Ok(maybe_tuple(symbol_types))
}
}
// If they didn't choose anything with a `~`, make a tuple of everything.
let symbol_types: Vec<TypeRef> = try! {
alt.expr.symbols.iter()
.map(|sym| self.symbol_type(sym))
.collect()
};
Ok(maybe_tuple(symbol_types))
}
fn symbol_type(&mut self, symbol: &Symbol) -> NormResult<TypeRef> {
fn symbol_type(&mut self, symbol: &Symbol) -> NormResult<TypeRepr> {
match *symbol {
Symbol::Terminal(_) => Ok(self.token_type.clone()),
Symbol::Terminal(_) => self.type_ref(self.token_type),
Symbol::Nonterminal(id) => self.nonterminal_type(id),
Symbol::Choose(ref s) => self.symbol_type(s),
Symbol::Name(_, ref s) => self.symbol_type(s),
@ -222,29 +209,22 @@ impl<'a> TypeInferencer<'a> {
}
}
fn repeat_type(&mut self, repeat: &RepeatSymbol) -> NormResult<TypeRef> {
fn repeat_type(&mut self, repeat: &RepeatSymbol) -> NormResult<TypeRepr> {
let symbol_type = try!(self.symbol_type(&repeat.symbol));
let path = match repeat.op {
RepeatOp::Plus |
RepeatOp::Star =>
vec![intern("std"), intern("vec"), intern("Vec")],
RepeatOp::Question =>
vec![intern("std"), intern("option"), intern("Option")],
};
Ok(TypeRef::Nominal { path: path, types: vec![symbol_type] })
Ok(repeat.op.type_repr(symbol_type))
}
}
impl<'a> NT<'a> {
fn new(data: &'a mut NonterminalData) -> NT<'a> {
NT { span: data.span, type_decl: &mut data.type_decl, alternatives: &data.alternatives }
impl<'grammar> NT<'grammar> {
fn new(data: &'grammar NonterminalData) -> NT<'grammar> {
NT { span: data.span, type_decl: &data.type_decl, alternatives: &data.alternatives }
}
}
fn maybe_tuple(v: Vec<TypeRef>) -> TypeRef {
fn maybe_tuple(v: Vec<TypeRepr>) -> TypeRepr {
if v.len() == 1 {
v.into_iter().next().unwrap()
} else {
TypeRef::Tuple(v)
TypeRepr::Tuple(v)
}
}

View File

@ -1,16 +1,45 @@
use intern::intern;
use parser;
use normalize::macro_expand::expand_macros;
use normalize::tyinfer::infer_types;
use normalize::test_util;
use grammar::parse_tree::TypeRef;
use grammar::repr::TypeRepr;
fn compare(g1: &str, g2: &str) {
let actual = parser::parse_grammar(g1).unwrap();
let actual = expand_macros(actual).unwrap();
let actual = infer_types(actual).unwrap();
fn type_repr(s: &str) -> TypeRepr {
let type_ref = parser::parse_type_ref(s).unwrap();
return convert(type_ref);
let expected = parser::parse_grammar(g2).unwrap();
fn convert(t: TypeRef) -> TypeRepr {
match t {
TypeRef::Tuple(types) =>
TypeRepr::Tuple(types.into_iter().map(convert).collect()),
TypeRef::Nominal { path, types } =>
TypeRepr::Nominal { path: path,
types: types.into_iter().map(convert).collect() },
TypeRef::Lifetime(id) =>
TypeRepr::Lifetime(id),
TypeRef::Id(id) =>
TypeRepr::Nominal { path: vec![id],
types: vec![] },
TypeRef::OfSymbol(_) =>
unreachable!("OfSymbol produced by parser")
}
}
}
test_util::compare(actual, expected);
fn compare(g1: &str, expected: Vec<(&'static str, &'static str)>) {
let grammar = parser::parse_grammar(g1).unwrap();
let grammar = expand_macros(grammar).unwrap();
let types = infer_types(&grammar).unwrap();
println!("types table: {:?}", types);
for (nt_id, nt_type) in expected {
let id = intern(nt_id);
let ty = type_repr(nt_type);
println!("expected type of {:?} is {:?}", id, ty);
assert_eq!(types.nt_type(id), Some(&ty));
}
}
#[test]
@ -22,14 +51,11 @@ grammar Foo {
Y: Foo = \"Hi\";
Z = \"Ho\";
}
","
grammar Foo {
token Tok where { };
X: (Foo, Tok) = Y Z;
Y: Foo = \"Hi\";
Z: Tok = \"Ho\";
}
")
", vec![
("X", "(Foo, Tok)"),
("Y", "Foo"),
("Z", "Tok")
])
}
#[test]
@ -46,7 +72,7 @@ grammar Foo {
").unwrap();
let actual = expand_macros(grammar).unwrap();
assert!(infer_types(actual).is_err());
assert!(infer_types(&actual).is_err());
}
#[test]
@ -62,7 +88,7 @@ grammar Foo {
").unwrap();
let actual = expand_macros(grammar).unwrap();
assert!(infer_types(actual).is_err());
assert!(infer_types(&actual).is_err());
}
#[test]
@ -73,13 +99,10 @@ grammar Foo {
Two<X>: (X, X) = X X;
Ids = Two<\"Id\">;
}
","
grammar Foo {
token Tok where { };
Ids: (Tok, Tok) = `Two<\"Id\">`;
`Two<\"Id\">`: (Tok, Tok) = \"Id\" \"Id\";
}
")
", vec![
("Ids", "(Tok, Tok)"),
(r#"Two<"Id">"#, "(Tok, Tok)"),
])
}
#[test]
@ -90,13 +113,10 @@ grammar Foo {
Two<X> = X X;
Ids = Two<\"Id\">;
}
","
grammar Foo {
token Tok where { };
Ids: (Tok, Tok) = `Two<\"Id\">`;
`Two<\"Id\">`: (Tok, Tok) = \"Id\" \"Id\";
}
")
", vec![
("Ids", "(Tok, Tok)"),
(r#"Two<"Id">"#, "(Tok, Tok)"),
])
}
#[test]
@ -107,11 +127,8 @@ grammar Foo {
X = Y?;
Y = \"Hi\";
}
","
grammar Foo {
token Tok where { };
X: std::option::Option<Tok> = Y?;
Y: Tok = \"Hi\";
}
")
",vec![
("X", "std::option::Option<Tok>"),
("Y", "Tok")
])
}

View File

@ -76,7 +76,7 @@ rusty_peg! {
IF_COND: Condition =
("if" <c:COND>) => c;
ACTION: Action = ("=>" <b:CODE>) => Action::User(b);
ACTION: String = ("=>" <b:CODE>) => b;
// Conditions
@ -286,7 +286,7 @@ fn parse_nonterminal(text: &str) -> Result<GrammarItem,rusty_peg::Error> {
rusty_peg::Symbol::parse_complete(&NONTERMINAL, &mut parser, text)
}
fn parse_type_ref(text: &str) -> Result<TypeRef,rusty_peg::Error> {
pub fn parse_type_ref(text: &str) -> Result<TypeRef,rusty_peg::Error> {
let mut parser = Parser::new(());
rusty_peg::Symbol::parse_complete(&TYPE_REF, &mut parser, text)
}

18
src/util.rs Normal file
View File

@ -0,0 +1,18 @@
use std::fmt::{Display, Formatter, Error};
pub struct Sep<S>(pub &'static str, pub S);
impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
let &Sep(sep, vec) = self;
let mut elems = vec.iter();
if let Some(elem) = elems.next() {
try!(write!(fmt, "{}", elem));
while let Some(elem) = elems.next() {
try!(write!(fmt, "{}{}", sep, elem));
}
}
Ok(())
}
}