Rewrite type inferencer to produce a separate Types table

using a simpler types grammar than rewrite the tree in place
This commit is contained in:
Niko Matsakis 2015-06-17 06:05:11 -04:00
parent 975ae0a200
commit 9200285d49
10 changed files with 319 additions and 214 deletions

View File

@ -1,6 +1,6 @@
//! The grammar definition.
pub mod parse_tree;
// pub mod repr;
pub mod repr;
// pub mod token;

View File

@ -59,8 +59,10 @@ grammar Type<'input, T> {
use intern::InternedString;
use intern::{intern, InternedString};
use grammar::repr::TypeRepr;
use std::fmt::{Display, Formatter, Error};
use util::Sep;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Grammar {
@ -124,19 +126,11 @@ pub struct Alternative {
pub condition: Option<Condition>,
// => { code }
pub action: Option<Action>,
pub action: Option<String>,
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Action {
// code provided by the user
// an index into a side-list of action fns, which is setup to take
// all of the values in this alternative as arguments, dropping
// the ones it doesn't care about.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct ActionFnIndex(u32);
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Condition {
@ -291,22 +285,6 @@ impl Display for MacroSymbol {
struct Sep<S>(&'static str, S);
impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
let &Sep(sep, vec) = self;
let mut elems = vec.iter();
if let Some(elem) = {
try!(write!(fmt, "{}", elem));
while let Some(elem) = {
try!(write!(fmt, "{}{}", sep, elem));
impl Display for TypeRef {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
@ -325,3 +303,26 @@ impl Display for TypeRef {
impl ActionFnIndex {
pub fn new(x: usize) -> ActionFnIndex {
ActionFnIndex(x as u32)
pub fn index(&self) -> usize {
self.0 as usize
impl RepeatOp {
pub fn type_repr(&self, symbol_type: TypeRepr) -> TypeRepr {
let path = match *self {
RepeatOp::Plus |
RepeatOp::Star =>
vec![intern("std"), intern("vec"), intern("Vec")],
RepeatOp::Question =>
vec![intern("std"), intern("option"), intern("Option")],
TypeRepr::Nominal { path: path, types: vec![symbol_type] }

View File

@ -1,31 +1,59 @@
* Compiled representation of a grammar. Simplified, normalized
* version of parse-tree.
* version of `parse_tree`. The normalization passes produce this
* representation incrementally.
use intern::InternedString;
use std::collections::HashMap;
use std::fmt::{Display, Formatter, Error};
use util::Sep;
pub struct Grammar {
pub nonterminals: Vec<Nonterminal>,
pub action_fns: Vec<ActionFn>,
pub struct Nonterminal {
name: InternedString,
alternatives: Vec<Alternative>,
action_fn: usize
pub struct Alternative {
symbols: Vec<Symbol>
pub enum Symbol {
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ActionFn {
code: String
arg_names: Vec<InternedString>,
arg_types: Vec<TypeRepr>,
ret_type: Vec<TypeRepr>,
code: String,
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum TypeRepr {
Nominal { path: Vec<InternedString>, types: Vec<TypeRepr> },
pub struct Types {
nonterminal_types: HashMap<InternedString, TypeRepr>
impl Types {
pub fn new() -> Types {
Types { nonterminal_types: HashMap::new() }
pub fn add_type(&mut self, nt_id: InternedString, ty: TypeRepr) {
assert!(self.nonterminal_types.insert(nt_id, ty).is_none());
pub fn nt_type(&self, nt_id: InternedString) -> Option<&TypeRepr> {
impl Display for TypeRepr {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
TypeRepr::Tuple(ref types) =>
write!(fmt, "({})", Sep(", ", types)),
TypeRepr::Nominal { ref path, ref types } if types.len() == 0 =>
write!(fmt, "{}", Sep("::", path)),
TypeRepr::Nominal { ref path, ref types } =>
write!(fmt, "{}<{}>", Sep("::", path), Sep(", ", types)),
TypeRepr::Lifetime(id) =>
write!(fmt, "{}", id),

View File

@ -9,6 +9,7 @@ mod grammar;
mod intern;
mod normalize;
mod parser;
mod util;
fn main() {

View File

@ -23,9 +23,6 @@ macro_rules! return_err {
mod test_util;
// These are executed *IN ORDER*:
// Expands macros and expressions
@ -87,3 +84,12 @@ mod tyinfer;
// AFTER THIS POINT: No more Symbol::Question remain.
// mod remove_question;
// Shared routines
mod test_util;
mod norm_util;

View File

@ -0,0 +1,54 @@
use intern::InternedString;
use grammar::parse_tree::{Alternative, Symbol};
pub enum AlternativeAction<'a> {
User(&'a str),
pub enum Symbols<'a> {
Named(Vec<(InternedString, &'a Symbol)>),
Anon(Vec<&'a Symbol>),
pub fn analyze_action<'a>(alt: &'a Alternative) -> AlternativeAction<'a> {
// We can't infer types for alternatives with actions
if let Some(ref code) = alt.action {
return AlternativeAction::User(code);
pub fn analyze_symbols<'a>(alt: &'a Alternative) -> Symbols<'a> {
// First look for named symbols.
let named_symbols: Vec<_> =
.filter_map(|sym| match *sym {
Symbol::Name(id, ref sub) => Some((id, &**sub)),
_ => None,
if !named_symbols.is_empty() {
return Symbols::Named(named_symbols);
// Otherwise, make a tuple of the items they chose with a `~`.
let chosen_symbol_types: Vec<_> =
.filter_map(|sym| match *sym {
Symbol::Choose(..) => Some(sym),
_ => None,
if !chosen_symbol_types.is_empty() {
return Symbols::Anon(chosen_symbol_types);
// If they didn't choose anything with a `~`, make a tuple of everything.

View File

@ -1,40 +1,41 @@
use super::{NormResult, NormError};
use super::norm_util::{self, AlternativeAction, Symbols};
use std::collections::{HashMap};
use intern::{intern, InternedString};
use intern::{InternedString};
use grammar::parse_tree::{Alternative, Grammar, GrammarItem,
NonterminalData, RepeatSymbol, RepeatOp,
Span, Symbol, TypeRef};
use normalize::{NormResult, NormError};
NonterminalData, RepeatSymbol, Span, Symbol, TypeRef};
use grammar::repr::{Types, TypeRepr};
mod test;
pub fn infer_types(mut grammar: Grammar) -> NormResult<Grammar> {
let mut inferencer = try!(TypeInferencer::new(&mut grammar));
pub fn infer_types(grammar: &Grammar) -> NormResult<Types> {
let inferencer = try!(TypeInferencer::new(&grammar));
struct TypeInferencer<'a> {
token_type: TypeRef,
struct TypeInferencer<'grammar> {
token_type: &'grammar TypeRef,
stack: Vec<InternedString>,
nonterminals: HashMap<InternedString, NT<'a>>,
nonterminals: HashMap<InternedString, NT<'grammar>>,
types: Types,
struct NT<'a> {
#[derive(Copy, Clone)]
struct NT<'grammar> {
span: Span,
type_decl: &'a mut Option<TypeRef>,
alternatives: &'a Vec<Alternative>,
type_decl: &'grammar Option<TypeRef>,
alternatives: &'grammar Vec<Alternative>,
fn extract_token_type(grammar: &Grammar) -> NormResult<TypeRef> {
fn extract_token_type(grammar: &Grammar) -> NormResult<&TypeRef> {
let mut token_types =
.filter_map(|item| {
match *item {
GrammarItem::TokenType(ref data) => Some(data.type_name.clone()),
GrammarItem::TokenType(ref data) => Some(&data.type_name),
_ => None,
@ -52,19 +53,19 @@ fn extract_token_type(grammar: &Grammar) -> NormResult<TypeRef> {
impl<'a> TypeInferencer<'a> {
fn new(grammar: &'a mut Grammar) -> NormResult<TypeInferencer<'a>> {
impl<'grammar> TypeInferencer<'grammar> {
fn new(grammar: &'grammar Grammar) -> NormResult<TypeInferencer<'grammar>> {
let token_type =
let nonterminals =
.filter_map(|item| {
match *item {
GrammarItem::TokenType(..) =>
GrammarItem::Nonterminal(ref mut data) => {
GrammarItem::Nonterminal(ref data) => {
assert!(!data.is_macro_def()); // normalized away by now
Some((, NT::new(data)))
@ -74,53 +75,53 @@ impl<'a> TypeInferencer<'a> {
Ok(TypeInferencer { token_type: token_type,
stack: vec![],
nonterminals: nonterminals })
nonterminals: nonterminals,
types: Types::new() })
fn infer_types(&mut self) -> NormResult<()> {
fn infer_types(mut self) -> NormResult<Types> {
let ids: Vec<InternedString> =
.map(|(&id, _)| id)
for id in ids {
let ty = try!(self.nonterminal_type(id));
*self.nonterminals.get_mut(&id).unwrap().type_decl = Some(ty);
fn nonterminal_type(&mut self, id: InternedString) -> NormResult<TypeRef> {
let (span, type_decl, alternatives) = {
let nt = &self.nonterminals[&id];
(nt.span, nt.type_decl.clone(), nt.alternatives)
if self.stack.contains(&id) {
return_err!(span, "cannot infer type of `{}` because it references itself", id);
fn nonterminal_type(&mut self, id: InternedString) -> NormResult<TypeRepr> {
if let Some(repr) = self.types.nt_type(id) {
return Ok(repr.clone());
self.push(id, |this| {
let type_decl = type_decl; // FIXME rustc bug requires thisx
if let Some(mut type_decl) = type_decl {
try!(this.refresh_type(&mut type_decl));
return Ok(type_decl);
let nt = self.nonterminals[&id];
if self.stack.contains(&id) {
return_err!(nt.span, "cannot infer type of `{}` because it references itself", id);
let ty = try!(self.push(id, |this| {
if let &Some(ref type_decl) = nt.type_decl {
return this.type_ref(type_decl);
let mut alternative_types: Vec<TypeRef> =
.map(|alt| this.alternative_type(alt))
let mut alternative_types: Vec<_> = try! {
.map(|alt| this.alternative_type(alt))
// if there are no alternatives, then call it an error
if alternative_types.is_empty() {
"nonterminal `{}` has no alternatives and hence parse cannot succeed",
for (ty, alt) in alternative_types[1..].iter().zip(&alternatives[1..]) {
for (ty, alt) in alternative_types[1..].iter().zip(&nt.alternatives[1..]) {
if &alternative_types[0] != ty {
"type of this alternative is `{}`, \
@ -130,11 +131,14 @@ impl<'a> TypeInferencer<'a> {
self.types.add_type(id, ty.clone());
fn push<F,R>(&mut self, id: InternedString, f: F) -> R
where F: FnOnce(&mut TypeInferencer) -> R
fn push<F,R>(&mut self, id: InternedString, f: F) -> NormResult<R>
where F: FnOnce(&mut TypeInferencer) -> NormResult<R>
let r = f(self);
@ -142,75 +146,58 @@ impl<'a> TypeInferencer<'a> {
fn refresh_type(&mut self, type_ref: &mut TypeRef) -> NormResult<()> {
let replacement = match *type_ref {
TypeRef::Tuple(ref mut types) |
TypeRef::Nominal { path: _, ref mut types } => {
for t in types {
return Ok(());
fn type_ref(&mut self, type_ref: &TypeRef) -> NormResult<TypeRepr> {
match *type_ref {
TypeRef::Tuple(ref types) => {
let types = try! {
types.iter().map(|t| self.type_ref(t)).collect()
TypeRef::Lifetime(_) |
TypeRef::Id(_) => {
return Ok(());
TypeRef::Nominal { ref path, ref types } => {
let types = try! {
types.iter().map(|t| self.type_ref(t)).collect()
Ok(TypeRepr::Nominal { path: path.clone(), types: types })
TypeRef::Lifetime(id) => {
TypeRef::Id(id) => {
Ok(TypeRepr::Nominal { path: vec![id], types: vec![] })
TypeRef::OfSymbol(ref symbol) => {
*type_ref = replacement;
fn alternative_type(&mut self, alt: &Alternative) -> NormResult<TypeRef> {
// We can't infer types for alternatives with actions
if alt.action.is_some() {
return_err!(alt.span, "cannot infer types if there is custom action code");
fn alternative_type(&mut self, alt: &Alternative) -> NormResult<TypeRepr> {
match norm_util::analyze_action(alt) {
AlternativeAction::User(_) => {
return_err!(alt.span, "cannot infer types if there is custom action code");
// We can't infer types if there are named symbols, because
// that should be a struct and we don't know which one.
let named_symbols =
.filter(|sym| match **sym {
Symbol::Name(..) => true,
_ => false,
for sym in named_symbols {
"cannot infer types in the presence of named symbols like `{}`",
AlternativeAction::Default(Symbols::Named(ref syms)) => {
"cannot infer types in the presence of named symbols like `~{}:{}`",
syms[0].0, syms[0].1);
// Otherwise, make a tuple of the items they chose with a `~`.
let chosen_symbol_types: Vec<TypeRef> = try! {
.filter_map(|sym| match *sym {
Symbol::Choose(..) => Some(self.symbol_type(sym)),
_ => None,
if !chosen_symbol_types.is_empty() {
return Ok(maybe_tuple(chosen_symbol_types));
AlternativeAction::Default(Symbols::Anon(syms)) => {
let symbol_types: Vec<TypeRepr> = try! {
.map(|sym| self.symbol_type(sym))
// If they didn't choose anything with a `~`, make a tuple of everything.
let symbol_types: Vec<TypeRef> = try! {
.map(|sym| self.symbol_type(sym))
fn symbol_type(&mut self, symbol: &Symbol) -> NormResult<TypeRef> {
fn symbol_type(&mut self, symbol: &Symbol) -> NormResult<TypeRepr> {
match *symbol {
Symbol::Terminal(_) => Ok(self.token_type.clone()),
Symbol::Terminal(_) => self.type_ref(self.token_type),
Symbol::Nonterminal(id) => self.nonterminal_type(id),
Symbol::Choose(ref s) => self.symbol_type(s),
Symbol::Name(_, ref s) => self.symbol_type(s),
@ -222,29 +209,22 @@ impl<'a> TypeInferencer<'a> {
fn repeat_type(&mut self, repeat: &RepeatSymbol) -> NormResult<TypeRef> {
fn repeat_type(&mut self, repeat: &RepeatSymbol) -> NormResult<TypeRepr> {
let symbol_type = try!(self.symbol_type(&repeat.symbol));
let path = match repeat.op {
RepeatOp::Plus |
RepeatOp::Star =>
vec![intern("std"), intern("vec"), intern("Vec")],
RepeatOp::Question =>
vec![intern("std"), intern("option"), intern("Option")],
Ok(TypeRef::Nominal { path: path, types: vec![symbol_type] })
impl<'a> NT<'a> {
fn new(data: &'a mut NonterminalData) -> NT<'a> {
NT { span: data.span, type_decl: &mut data.type_decl, alternatives: &data.alternatives }
impl<'grammar> NT<'grammar> {
fn new(data: &'grammar NonterminalData) -> NT<'grammar> {
NT { span: data.span, type_decl: &data.type_decl, alternatives: &data.alternatives }
fn maybe_tuple(v: Vec<TypeRef>) -> TypeRef {
fn maybe_tuple(v: Vec<TypeRepr>) -> TypeRepr {
if v.len() == 1 {
} else {

View File

@ -1,16 +1,45 @@
use intern::intern;
use parser;
use normalize::macro_expand::expand_macros;
use normalize::tyinfer::infer_types;
use normalize::test_util;
use grammar::parse_tree::TypeRef;
use grammar::repr::TypeRepr;
fn compare(g1: &str, g2: &str) {
let actual = parser::parse_grammar(g1).unwrap();
let actual = expand_macros(actual).unwrap();
let actual = infer_types(actual).unwrap();
fn type_repr(s: &str) -> TypeRepr {
let type_ref = parser::parse_type_ref(s).unwrap();
return convert(type_ref);
let expected = parser::parse_grammar(g2).unwrap();
fn convert(t: TypeRef) -> TypeRepr {
match t {
TypeRef::Tuple(types) =>
TypeRef::Nominal { path, types } =>
TypeRepr::Nominal { path: path,
types: types.into_iter().map(convert).collect() },
TypeRef::Lifetime(id) =>
TypeRef::Id(id) =>
TypeRepr::Nominal { path: vec![id],
types: vec![] },
TypeRef::OfSymbol(_) =>
unreachable!("OfSymbol produced by parser")
test_util::compare(actual, expected);
fn compare(g1: &str, expected: Vec<(&'static str, &'static str)>) {
let grammar = parser::parse_grammar(g1).unwrap();
let grammar = expand_macros(grammar).unwrap();
let types = infer_types(&grammar).unwrap();
println!("types table: {:?}", types);
for (nt_id, nt_type) in expected {
let id = intern(nt_id);
let ty = type_repr(nt_type);
println!("expected type of {:?} is {:?}", id, ty);
assert_eq!(types.nt_type(id), Some(&ty));
@ -22,14 +51,11 @@ grammar Foo {
Y: Foo = \"Hi\";
Z = \"Ho\";
grammar Foo {
token Tok where { };
X: (Foo, Tok) = Y Z;
Y: Foo = \"Hi\";
Z: Tok = \"Ho\";
", vec![
("X", "(Foo, Tok)"),
("Y", "Foo"),
("Z", "Tok")
@ -46,7 +72,7 @@ grammar Foo {
let actual = expand_macros(grammar).unwrap();
@ -62,7 +88,7 @@ grammar Foo {
let actual = expand_macros(grammar).unwrap();
@ -73,13 +99,10 @@ grammar Foo {
Two<X>: (X, X) = X X;
Ids = Two<\"Id\">;
grammar Foo {
token Tok where { };
Ids: (Tok, Tok) = `Two<\"Id\">`;
`Two<\"Id\">`: (Tok, Tok) = \"Id\" \"Id\";
", vec![
("Ids", "(Tok, Tok)"),
(r#"Two<"Id">"#, "(Tok, Tok)"),
@ -90,13 +113,10 @@ grammar Foo {
Two<X> = X X;
Ids = Two<\"Id\">;
grammar Foo {
token Tok where { };
Ids: (Tok, Tok) = `Two<\"Id\">`;
`Two<\"Id\">`: (Tok, Tok) = \"Id\" \"Id\";
", vec![
("Ids", "(Tok, Tok)"),
(r#"Two<"Id">"#, "(Tok, Tok)"),
@ -107,11 +127,8 @@ grammar Foo {
X = Y?;
Y = \"Hi\";
grammar Foo {
token Tok where { };
X: std::option::Option<Tok> = Y?;
Y: Tok = \"Hi\";
("X", "std::option::Option<Tok>"),
("Y", "Tok")

View File

@ -76,7 +76,7 @@ rusty_peg! {
IF_COND: Condition =
("if" <c:COND>) => c;
ACTION: Action = ("=>" <b:CODE>) => Action::User(b);
ACTION: String = ("=>" <b:CODE>) => b;
// Conditions
@ -286,7 +286,7 @@ fn parse_nonterminal(text: &str) -> Result<GrammarItem,rusty_peg::Error> {
rusty_peg::Symbol::parse_complete(&NONTERMINAL, &mut parser, text)
fn parse_type_ref(text: &str) -> Result<TypeRef,rusty_peg::Error> {
pub fn parse_type_ref(text: &str) -> Result<TypeRef,rusty_peg::Error> {
let mut parser = Parser::new(());
rusty_peg::Symbol::parse_complete(&TYPE_REF, &mut parser, text)

src/ Normal file
View File

@ -0,0 +1,18 @@
use std::fmt::{Display, Formatter, Error};
pub struct Sep<S>(pub &'static str, pub S);
impl<'a,S:Display> Display for Sep<&'a Vec<S>> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
let &Sep(sep, vec) = self;
let mut elems = vec.iter();
if let Some(elem) = {
try!(write!(fmt, "{}", elem));
while let Some(elem) = {
try!(write!(fmt, "{}{}", sep, elem));