Code up the lowering step (or a rough draft; no tests yet...)

This commit is contained in:
Niko Matsakis 2015-06-18 05:35:09 -04:00
parent e7b377793b
commit 6ea20abb8f
7 changed files with 220 additions and 34 deletions

View File

@ -224,7 +224,7 @@ impl Symbol {
pub fn type_repr(&self, types: &Types) -> TypeRepr {
match *self {
Symbol::Terminal(_) => types.terminal_type().clone(),
Symbol::Nonterminal(id) => types.nt_type(id).unwrap().clone(),
Symbol::Nonterminal(id) => types.nonterminal_type(id).clone(),
Symbol::Choose(ref s) => s.type_repr(types),
Symbol::Name(_, ref s) => s.type_repr(types),

View File

@ -5,28 +5,25 @@
*/
use intern::InternedString;
use grammar::parse_tree::Span;
use std::collections::HashMap;
use std::fmt::{Display, Formatter, Error};
use util::Sep;
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug)]
pub struct Grammar {
pub token: TokenData,
pub action_fns: Vec<ActionFn>,
pub action_fn_defns: Vec<ActionFnDefn>,
pub productions: Vec<Production>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TokenData {
pub token_type: TypeRepr,
pub conversions: HashMap<InternedString, InternedString>,
pub types: Types,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Production {
pub span: Span,
pub nonterminal: InternedString,
pub symbols: Vec<Symbol>,
pub action_fn: ActionFnIndex,
pub action_fn: ActionFn,
}
#[derive(Clone, Debug, PartialEq, Eq)]
@ -36,10 +33,10 @@ pub enum Symbol {
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ActionFn {
pub arg_names: Vec<InternedString>,
pub struct ActionFnDefn {
pub arg_patterns: Vec<InternedString>,
pub arg_types: Vec<TypeRepr>,
pub ret_type: Vec<TypeRepr>,
pub ret_type: TypeRepr,
pub code: String,
}
@ -50,7 +47,7 @@ pub enum TypeRepr {
Lifetime(InternedString),
}
#[derive(Debug)]
#[derive(Clone, Debug)]
pub struct Types {
terminal_type: TypeRepr,
nonterminal_types: HashMap<InternedString, TypeRepr>
@ -70,9 +67,13 @@ impl Types {
&self.terminal_type
}
pub fn nt_type(&self, nt_id: InternedString) -> Option<&TypeRepr> {
pub fn lookup_nonterminal_type(&self, nt_id: InternedString) -> Option<&TypeRepr> {
self.nonterminal_types.get(&nt_id)
}
pub fn nonterminal_type(&self, nt_id: InternedString) -> &TypeRepr {
&self.nonterminal_types[&nt_id]
}
}
impl Display for TypeRepr {
@ -91,11 +92,11 @@ impl Display for TypeRepr {
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct ActionFnIndex(u32);
pub struct ActionFn(u32);
impl ActionFnIndex {
pub fn new(x: usize) -> ActionFnIndex {
ActionFnIndex(x as u32)
impl ActionFn {
pub fn new(x: usize) -> ActionFn {
ActionFn(x as u32)
}
pub fn index(&self) -> usize {
@ -103,3 +104,11 @@ impl ActionFnIndex {
}
}
impl Symbol {
pub fn ty<'ty>(&self, t: &'ty Types) -> &'ty TypeRepr {
match *self {
Symbol::Nonterminal(id) => t.terminal_type(),
Symbol::Terminal(id) => t.nonterminal_type(id),
}
}
}

View File

@ -1,3 +1,179 @@
/*!
* Lower
*/
use intern::{self, intern, InternedString};
use normalize::NormResult;
use normalize::norm_util::{self, Symbols};
use grammar::parse_tree as pt;
use grammar::repr as r;
use std::collections::HashMap;
use util::Sep;
pub fn lower(grammar: pt::Grammar, types: r::Types) -> NormResult<r::Grammar> {
let mut state = LowerState::new(types);
state.lower(grammar)
}
struct LowerState {
grammar: r::Grammar
}
impl LowerState {
fn new(types: r::Types) -> LowerState {
LowerState {
grammar: r::Grammar {
action_fn_defns: vec![],
productions: vec![],
conversions: HashMap::new(),
types: types
}
}
}
fn lower(mut self, grammar: pt::Grammar) -> NormResult<r::Grammar> {
for item in grammar.items {
match item {
pt::GrammarItem::TokenType(data) => {
self.grammar.conversions.extend(data.conversions);
}
pt::GrammarItem::Nonterminal(nt) => {
for alt in nt.alternatives {
let nt_type = self.grammar.types.nonterminal_type(nt.name).clone();
let symbols = self.symbols(&alt.expr.symbols);
let action_fn = self.action_fn(nt_type, &alt.expr, &symbols, alt.action);
let production = r::Production {
span: alt.span,
nonterminal: nt.name,
symbols: symbols,
action_fn: action_fn,
};
self.grammar.productions.push(production);
}
}
}
}
Ok(self.grammar)
}
fn action_fn(&mut self,
nt_type: r::TypeRepr,
expr: &pt::ExprSymbol,
symbols: &[r::Symbol],
action: Option<String>)
-> r::ActionFn
{
let action = match action {
Some(s) => s,
None => format!("(~~)"),
};
// Note that the action fn takes ALL of the symbols in `expr`
// as arguments, and some of them are simply dropped based on
// the user's selections.
// The set of argument types is thus the type of all symbols:
let arg_types: Vec<r::TypeRepr> =
symbols.iter().map(|s| s.ty(&self.grammar.types)).cloned().collect();
let action_fn_defn = match norm_util::analyze_expr(expr) {
Symbols::Named(names) => {
// if there are named symbols, we want to give the
// arguments the names that the user gave them:
let arg_patterns =
patterns(names.iter().map(|&(index, name, _)| (index, name)),
symbols.len());
r::ActionFnDefn {
arg_patterns: arg_patterns,
arg_types: arg_types,
ret_type: nt_type,
code: action
}
}
Symbols::Anon(indices) => {
let names: Vec<_> =
(0..indices.len()).map(|i| fresh_name(i, &action)).collect();
let arg_patterns =
patterns(indices.iter().map(|&(index, _)| index)
.zip(names.iter().cloned()),
symbols.len());
let name_str = intern::read(|interner| {
let name_strs: Vec<_> = names.iter().map(|&n| interner.data(n)).collect();
name_strs.connect(", ")
});
let action = action.replace("~~", &name_str);
r::ActionFnDefn {
arg_patterns: arg_patterns,
arg_types: arg_types,
ret_type: nt_type,
code: action
}
}
};
let index = r::ActionFn::new(self.grammar.action_fn_defns.len());
self.grammar.action_fn_defns.push(action_fn_defn);
index
}
fn symbols(&mut self, symbols: &[pt::Symbol]) -> Vec<r::Symbol> {
symbols.iter().map(|sym| self.symbol(sym)).collect()
}
fn symbol(&mut self, symbol: &pt::Symbol) -> r::Symbol {
match *symbol {
pt::Symbol::Terminal(id) => r::Symbol::Terminal(id),
pt::Symbol::Nonterminal(id) => r::Symbol::Nonterminal(id),
pt::Symbol::Choose(ref s) | pt::Symbol::Name(_, ref s) => self.symbol(s),
pt::Symbol::Macro(..) | pt::Symbol::Repeat(..) | pt::Symbol::Expr(..) => {
unreachable!("symbol `{}` should have been normalized away by now", symbol)
}
}
}
}
fn patterns<I>(mut chosen: I, num_args: usize) -> Vec<InternedString>
where I: Iterator<Item=(usize, InternedString)>
{
let blank = intern("_");
let mut next_chosen = chosen.next();
let result =
(0..num_args)
.map(|index| {
match next_chosen {
Some((chosen_index, chosen_name)) if chosen_index == index => {
next_chosen = chosen.next();
chosen_name
}
_ => blank,
}
})
.collect();
debug_assert!(next_chosen.is_none());
result
}
fn fresh_name(counter: usize, action_str: &str) -> InternedString {
let mut name = format!("__{}", counter);
// Check whether this string appears anywhere in the action. If
// so, keep appending an underscore until it doesn't. :) Obviously
// this is stricter than needed, since the action string might be
// like `print("__1")`, in which case we'll detect a false
// conflict (or it might contain a variable named `__1x`,
// etc). But so what.
while action_str.contains(&name) {
name.push('_');
}
intern(&name)
}

View File

@ -306,7 +306,7 @@ impl MacroExpander {
let ty_ref = match norm_util::analyze_expr(&expr) {
Symbols::Named(names) => {
let (ex_id, ex_sym) = names[0];
let (_, ex_id, ex_sym) = names[0];
return_err!(
expr.span,
"named symbols like `~{}:{}` are only allowed at the top-level of a nonterminal",
@ -315,8 +315,7 @@ impl MacroExpander {
Symbols::Anon(syms) => {
maybe_tuple(
syms.into_iter()
.cloned()
.map(TypeRef::OfSymbol)
.map(|(_, s)| TypeRef::OfSymbol(s.clone()))
.collect())
}
};

View File

@ -9,8 +9,8 @@ pub enum AlternativeAction<'a> {
#[derive(Debug)]
pub enum Symbols<'a> {
Named(Vec<(InternedString, &'a Symbol)>),
Anon(Vec<&'a Symbol>),
Named(Vec<(usize, InternedString, &'a Symbol)>),
Anon(Vec<(usize, &'a Symbol)>),
}
pub fn analyze_action<'a>(alt: &'a Alternative) -> AlternativeAction<'a> {
@ -27,8 +27,9 @@ pub fn analyze_expr<'a>(expr: &'a ExprSymbol) -> Symbols<'a> {
let named_symbols: Vec<_> =
expr.symbols
.iter()
.filter_map(|sym| match *sym {
Symbol::Name(id, ref sub) => Some((id, &**sub)),
.enumerate()
.filter_map(|(idx, sym)| match *sym {
Symbol::Name(id, ref sub) => Some((idx, id, &**sub)),
_ => None,
})
.collect();
@ -40,8 +41,9 @@ pub fn analyze_expr<'a>(expr: &'a ExprSymbol) -> Symbols<'a> {
let chosen_symbol_types: Vec<_> =
expr.symbols
.iter()
.filter_map(|sym| match *sym {
Symbol::Choose(ref sub) => Some(&**sub),
.enumerate()
.filter_map(|(idx, sym)| match *sym {
Symbol::Choose(ref sub) => Some((idx, &**sub)),
_ => None,
})
.collect();
@ -50,5 +52,5 @@ pub fn analyze_expr<'a>(expr: &'a ExprSymbol) -> Symbols<'a> {
}
// If they didn't choose anything with a `~`, make a tuple of everything.
Symbols::Anon(expr.symbols.iter().collect())
Symbols::Anon(expr.symbols.iter().enumerate().collect())
}

View File

@ -85,14 +85,14 @@ impl<'grammar> TypeInferencer<'grammar> {
for id in ids {
try!(self.nonterminal_type(id));
debug_assert!(self.types.nt_type(id).is_some());
debug_assert!(self.types.lookup_nonterminal_type(id).is_some());
}
Ok(self.types)
}
fn nonterminal_type(&mut self, id: InternedString) -> NormResult<TypeRepr> {
if let Some(repr) = self.types.nt_type(id) {
if let Some(repr) = self.types.lookup_nonterminal_type(id) {
return Ok(repr.clone());
}
@ -181,13 +181,13 @@ impl<'grammar> TypeInferencer<'grammar> {
AlternativeAction::Default(Symbols::Named(ref syms)) => {
return_err!(alt.span,
"cannot infer types in the presence of named symbols like `~{}:{}`",
syms[0].0, syms[0].1);
syms[0].1, syms[0].2);
}
AlternativeAction::Default(Symbols::Anon(syms)) => {
let symbol_types: Vec<TypeRepr> = try! {
syms.iter()
.map(|sym| self.symbol_type(sym))
.map(|&(_, sym)| self.symbol_type(sym))
.collect()
};
Ok(maybe_tuple(symbol_types))

View File

@ -20,7 +20,7 @@ fn compare(g1: &str, expected: Vec<(&'static str, &'static str)>) {
let id = intern(nt_id);
let ty = type_repr(nt_type);
println!("expected type of {:?} is {:?}", id, ty);
assert_eq!(types.nt_type(id), Some(&ty));
assert_eq!(types.nonterminal_type(id), &ty);
}
}