revamp how we tokenck and store the InternToken

In `InternToken`, we now coallesce everything into one `Vec<MatchEntry>`, rather than using a vector and a map. In the token-check code, the various fields associated with a match are moved into a struct.
2025-03-28 06:01:02 +00:00 · 2017-03-29 22:59:38 -06:00 · 2017-03-29 22:59:38 -06:00 · 6fe7377c22
commit 6fe7377c22
parent b75669c8d6
9 changed files with 281 additions and 220 deletions
--- a/lalrpop/src/grammar/parse_tree.rs
+++ b/lalrpop/src/grammar/parse_tree.rs
@ -15,7 +15,6 @@ use message::builder::InlineBuilder;
 use std::fmt::{Debug, Display, Formatter, Error};
 use tls::Tls;
 use util::Sep;
 use collections::Map;
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Grammar {
@ -118,15 +117,49 @@ pub type MatchMapping = TerminalString;
 pub struct InternToken {
    /// Set of `r"foo"` and `"foo"` literals extracted from the
    /// grammar. Sorted by order of increasing precedence.
-    pub literals: Vec<TerminalLiteral>,
+    pub match_entries: Vec<MatchEntry>,
    /// For each item remapped in a `match` block, map from the
    /// regex we match to the name the user wants to use.
    pub match_to_user_name_map: Map<TerminalLiteral, TerminalString>,
    pub dfa: DFA
 }
 /// In `token_check`, as we prepare to generate a tokenizer, we
 /// combine any `match` declaration the user may have given with the
 /// set of literals (e.g. `"foo"` or `r"[a-z]"`) that appear elsewhere
 /// in their in the grammar to produce a series of `MatchEntry`. Each
 /// `MatchEntry` roughly corresponds to one line in a `match` declaration.
 ///
 /// So e.g. if you had
 ///
 /// ```
 /// match {
 ///    r"(?i)BEGIN" => "BEGIN",
 ///    "+" => "+",
 /// } else {
 ///    _
 /// }
 ///
 /// ID = r"[a-zA-Z]+"
 /// ```
 ///
 /// This would correspond to three match entries:
 /// - `MatchEntry { match_literal: r"(?i)BEGIN", user_name: "BEGIN", precedence: 2 }`
 /// - `MatchEntry { match_literal: "+", user_name: "+", precedence: 3 }`
 /// - `MatchEntry { match_literal: "r[a-zA-Z]+"", user_name: r"[a-zA-Z]+", precedence: 0 }`
 ///
 /// A couple of things to note:
 ///
 /// - Literals appearing in the grammar are converting into an "identity" mapping
 /// - Each match group G is combined with the implicit priority IP of 1 for literals and 0 for
 ///   regex to yield the final precedence; the formula is `G*2 + IP`.
 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub struct MatchEntry {
    /// The precedence of this match entry.
    ///
    /// NB: This field must go first, so that `PartialOrd` sorts by precedence first!
    pub precedence: usize,
    pub match_literal: TerminalLiteral,
    pub user_name: TerminalString,
 }
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct ExternToken {
    pub span: Span,
@ -330,28 +363,18 @@ impl TerminalString {
 #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
 pub enum TerminalLiteral {
-    Quoted(InternedString, usize),
+    Quoted(InternedString),
-    Regex(InternedString, usize),
+    Regex(InternedString),
 }
 impl TerminalLiteral {
-    /// Currently, at least, quoted literals ("foo") always have
+    /// The *base precedence* is the precedence within a `match { }`
-    /// higher precedence than regex literals (r"foo"). This only
+    /// block level. It indicates that quoted things like `"foo"` get
-    /// applies when we are creating the tokenizer anyhow.
+    /// precedence over regex matches.
-    pub fn precedence(&self) -> usize {
+    pub fn base_precedence(&self) -> usize {
        match *self {
-            TerminalLiteral::Quoted(_, p) => p,
+            TerminalLiteral::Quoted(_) => 1,
-            TerminalLiteral::Regex(_, p) => p,
+            TerminalLiteral::Regex(_) => 0,
        }
    }
    pub fn with_match_precedence(self, p: usize) -> TerminalLiteral {
        // Multiply times two since we still want to distinguish
        // between quoted and regex precedence
        let base_precedence = p * 2;
        match self {
            TerminalLiteral::Quoted(i, _) => TerminalLiteral::Quoted(i, base_precedence+1),
            TerminalLiteral::Regex(i, _) => TerminalLiteral::Regex(i, base_precedence+0),
        }
    }
 }
@ -391,11 +414,11 @@ pub struct MacroSymbol {
 impl TerminalString {
    pub fn quoted(i: InternedString) -> TerminalString {
-        TerminalString::Literal(TerminalLiteral::Quoted(i, 1))
+        TerminalString::Literal(TerminalLiteral::Quoted(i))
    }
    pub fn regex(i: InternedString) -> TerminalString {
-        TerminalString::Literal(TerminalLiteral::Regex(i, 0))
+        TerminalString::Literal(TerminalLiteral::Regex(i))
    }
 }
@ -523,9 +546,9 @@ impl Debug for TerminalString {
 impl Display for TerminalLiteral {
    fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
        match *self {
-            TerminalLiteral::Quoted(s, _) =>
+            TerminalLiteral::Quoted(s) =>
                write!(fmt, "{:?}", s), // the Debug impl adds the `"` and escaping
-            TerminalLiteral::Regex(s, _) =>
+            TerminalLiteral::Regex(s) =>
                write!(fmt, "r#{:?}#", s), // FIXME -- need to determine proper number of #
        }
    }
@ -533,10 +556,7 @@ impl Display for TerminalLiteral {
 impl Debug for TerminalLiteral {
    fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
-        match *self {
+        write!(fmt, "{}", self)
            TerminalLiteral::Quoted(_, p) | TerminalLiteral::Regex(_, p) =>
                write!(fmt, "{}+{}", self, p)
        }
    }
 }
--- a/lalrpop/src/lexer/intern_token/mod.rs
+++ b/lalrpop/src/lexer/intern_token/mod.rs
@ -59,11 +59,11 @@ pub fn compile<W: Write>(
    // create a vector of rust string literals with the text of each
    // regular expression
    let regex_strings: Vec<String> = intern::read(|interner| {
-        intern_token.literals
+        intern_token.match_entries
                    .iter()
-                    .map(|&literal| match literal {
+                    .map(|match_entry| match match_entry.match_literal {
-                        TerminalLiteral::Quoted(s, _) => re::parse_literal(interner.data(s)),
+                        TerminalLiteral::Quoted(s) => re::parse_literal(interner.data(s)),
-                        TerminalLiteral::Regex(s, _) => re::parse_regex(interner.data(s)).unwrap(),
+                        TerminalLiteral::Regex(s) => re::parse_regex(interner.data(s)).unwrap(),
                    })
                    .map(|regex| {
                        // make sure all regex are anchored at the beginning of the input
@ -134,7 +134,7 @@ pub fn compile<W: Write>(
    // checking if each one matches, and remembering the longest one.
    rust!(out, "let mut {}longest_match = 0;", prefix); // length of longest match
    rust!(out, "let mut {}index = 0;", prefix); // index of longest match
-    rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.literals.len());
+    rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.match_entries.len());
    rust!(out, "if {}matches.matched({}i) {{", prefix, prefix);
    // re-run the regex to find out how long this particular match
--- a/lalrpop/src/normalize/lower/mod.rs
+++ b/lalrpop/src/normalize/lower/mod.rs
@ -71,31 +71,27 @@ impl<'s> LowerState<'s> {
                            types: vec![],
                        })),
                    };
-                    self.conversions.extend(data.literals
+                    self.conversions.extend(
-                                                .iter()
+                        data.match_entries
-                                                .enumerate()
+                            .iter()
-                                                .map(|(index, &literal)| {
+                            .enumerate()
-                                                    let pattern = Pattern {
+                            .map(|(index, match_entry)| {
-                                                        span: span,
+                                let pattern = Pattern {
-                                                        kind: PatternKind::Tuple(vec![
+                                    span: span,
-                                                                    Pattern {
+                                    kind: PatternKind::Tuple(vec![
-                                                                        span: span,
+                                        Pattern {
-                                                                        kind: PatternKind::Usize(index),
+                                            span: span,
-                                                                    },
+                                            kind: PatternKind::Usize(index),
-                                                                    Pattern {
+                                        },
-                                                                        span: span,
+                                        Pattern {
-                                                                        kind: PatternKind::Choose(input_str.clone())
+                                            span: span,
-                                                                    }
+                                            kind: PatternKind::Choose(input_str.clone())
-                                                                    ]),
+                                        }
-                                                    };
+                                    ]),
                                };
-                                                    // FIXME: This should be cleaner
+                                (match_entry.user_name, pattern)
-                                                    if let Some(&m) = data.match_to_user_name_map.get(&literal) {
+                            }));
                                                        return (m, pattern);
                                                    }
                                                    (TerminalString::Literal(literal), pattern)
                                                }));
                    self.intern_token = Some(data);
                }
--- a/lalrpop/src/normalize/macro_expand/mod.rs
+++ b/lalrpop/src/normalize/macro_expand/mod.rs
@ -252,7 +252,7 @@ impl MacroExpander {
    {
        if let Some(ref c) = *opt_cond {
            match args[&c.lhs] {
-                SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs, _))) => {
+                SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs))) => {
                    match c.op {
                        ConditionOp::Equals => Ok(lhs == c.rhs),
                        ConditionOp::NotEquals => Ok(lhs != c.rhs),
--- a/lalrpop/src/normalize/token_check/mod.rs
+++ b/lalrpop/src/normalize/token_check/mod.rs
@ -13,19 +13,16 @@ use lexer::dfa::{self, DFAConstructionError, Precedence};
 use lexer::nfa::NFAConstructionError::*;
 use grammar::consts::*;
 use grammar::parse_tree::*;
-use collections::Set;
+use collections::{Map, Set};
 use collections::{map, Map};
 #[cfg(test)]
 mod test;
 pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
-    let (has_enum_token, all_literals, match_to_user_name_map) = {
+    let (has_enum_token, match_block) = {
        let opt_match_token = grammar.match_token();
-        let mut match_to_user_name_map = map();
+        let mut match_block = MatchBlock::default();
        let mut user_name_to_match_map = map();
        let mut match_catch_all = false;
        if let Some(mt) = opt_match_token {
            // FIXME: This should probably move _inside_ the Validator
@ -34,50 +31,50 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
                for item in &mc.items {
                    // TODO: Maybe move this into MatchItem methods
                    match *item {
-                        MatchItem::Unmapped(sym, _) => {
+                        MatchItem::Unmapped(sym, span) => {
-                            let precedence_sym = sym.with_match_precedence(precedence);
+                            match_block.add_match_entry(precedence,
-                            match_to_user_name_map.insert(precedence_sym, TerminalString::Literal(sym));
+                                                 sym,
-                            user_name_to_match_map.insert(TerminalString::Literal(sym), precedence_sym);
+                                                 TerminalString::Literal(sym),
-                        },
+                                                 span)?;
-                        MatchItem::Mapped(sym, mapping, _) => {
+                        }
-                            let precedence_sym = sym.with_match_precedence(precedence);
+                        MatchItem::Mapped(sym, user, span) => {
-                            match_to_user_name_map.insert(precedence_sym, mapping);
+                            match_block.add_match_entry(precedence, sym, user, span)?;
-                            user_name_to_match_map.insert(mapping, precedence_sym);
+                        }
-                        },
+                        MatchItem::CatchAll(_) => {
-                        MatchItem::CatchAll(_) => { match_catch_all = true; }
+                            match_block.catch_all = true;
-                    };
+                        }
                    }
                }
            }
        } else {
            // no match block is equivalent to `match { _ }`
-            match_catch_all = true;
+            match_block.catch_all = true;
        }
        let opt_enum_token = grammar.enum_token();
        let conversions = opt_enum_token.map(|et| {
-            et.conversions.iter()
+                                                 et.conversions
-                          .map(|conversion| conversion.from)
+                                                     .iter()
-                          .collect()
+                                                     .map(|conversion| conversion.from)
-        });
+                                                     .collect()
                                             });
        let mut validator = Validator {
            grammar: &grammar,
            all_literals: map(),
            conversions: conversions,
-            user_name_to_match_map: user_name_to_match_map,
+            match_block: match_block,
            match_catch_all: match_catch_all
        };
        assert!(!opt_match_token.is_some() || !opt_enum_token.is_some(),
-                    "expected to not have both match and extern");
+                "expected to not have both match and extern");
        try!(validator.validate());
-        (opt_enum_token.is_some(), validator.all_literals, match_to_user_name_map)
+        (opt_enum_token.is_some(), validator.match_block)
    };
    if !has_enum_token {
-        try!(construct(&mut grammar, all_literals, match_to_user_name_map));
+        try!(construct(&mut grammar, match_block));
    }
    Ok(grammar)
@ -91,20 +88,100 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
 struct Validator<'grammar> {
    grammar: &'grammar Grammar,
-    all_literals: Map<TerminalLiteral, Span>,
+
    /// If an external tokenizer is in use, then this will be
    /// `Some(_)` and will point to all the defined conversions. In
    /// that case, the other fields below are irrelevant.
    conversions: Option<Set<TerminalString>>,
-    user_name_to_match_map: Map<TerminalString, TerminalLiteral>,
+
-    match_catch_all: bool,
+    match_block: MatchBlock,
 }
 /// Data summarizing the `match { }` block, along with any literals we
 /// scraped up.
 #[derive(Default)]
 struct MatchBlock {
    /// This map stores the `match { }` entries. If `match_catch_all`
    /// is true, then we will grow this set with "identity mappings"
    /// for new literals that we find.
    match_entries: Vec<MatchEntry>,
    /// The names of all terminals the user can legally type. If
    /// `match_catch_all` is true, then if we encounter additional
    /// terminal literals in the grammar, we will add them to this
    /// set.
    match_user_names: Set<TerminalString>,
    /// For each terminal literal that we have to match, the span
    /// where it appeared in user's source.  This can either be in the
    /// `match { }` section or else in the grammar somewhere (if added
    /// due to a catch-all, or there is no match section).
    spans: Map<TerminalLiteral, Span>,
    /// True if we should permit unrecognized literals to be used.
    catch_all: bool,
 }
 impl MatchBlock {
    fn add_match_entry(&mut self,
                       match_group_precedence: usize,
                       sym: TerminalLiteral,
                       user_name: TerminalString,
                       span: Span)
                       -> NormResult<()> {
        if let Some(_old_span) = self.spans.insert(sym, span) {
            return_err!(span, "multiple match entries for `{}`", sym);
        }
        // NB: It's legal for multiple regex to produce same terminal.
        self.match_user_names.insert(user_name);
        self.match_entries
            .push(MatchEntry {
                      precedence: match_group_precedence * 2 + sym.base_precedence(),
                      match_literal: sym,
                      user_name: user_name,
                  });
        Ok(())
    }
    fn add_literal_from_grammar(&mut self, sym: TerminalLiteral, span: Span) -> NormResult<()> {
        // Already saw this literal, maybe in a match entry, maybe in the grammar.
        if self.match_user_names
               .contains(&TerminalString::Literal(sym)) {
            return Ok(());
        }
        if !self.catch_all {
            return_err!(span,
                        "terminal `{}` does not have a match mapping defined for it",
                        sym);
        }
        self.match_user_names
            .insert(TerminalString::Literal(sym));
        self.match_entries
            .push(MatchEntry {
                      precedence: sym.base_precedence(),
                      match_literal: sym,
                      user_name: TerminalString::Literal(sym),
                  });
        self.spans.insert(sym, span);
        Ok(())
    }
 }
 impl<'grammar> Validator<'grammar> {
    fn validate(&mut self) -> NormResult<()> {
        for item in &self.grammar.items {
            match *item {
-                GrammarItem::Use(..) => { }
+                GrammarItem::Use(..) => {}
-                GrammarItem::MatchToken(..) => { }
+                GrammarItem::MatchToken(..) => {}
-                GrammarItem::ExternToken(_) => { }
+                GrammarItem::ExternToken(_) => {}
-                GrammarItem::InternToken(_) => { }
+                GrammarItem::InternToken(_) => {}
                GrammarItem::Nonterminal(ref data) => {
                    for alternative in &data.alternatives {
                        try!(self.validate_alternative(alternative));
@ -136,16 +213,15 @@ impl<'grammar> Validator<'grammar> {
            SymbolKind::Terminal(term) => {
                try!(self.validate_terminal(symbol.span, term));
            }
-            SymbolKind::Nonterminal(_) => {
+            SymbolKind::Nonterminal(_) => {}
            }
            SymbolKind::Repeat(ref repeat) => {
                try!(self.validate_symbol(&repeat.symbol));
            }
-            SymbolKind::Choose(ref sym) | SymbolKind::Name(_, ref sym) => {
+            SymbolKind::Choose(ref sym) |
            SymbolKind::Name(_, ref sym) => {
                try!(self.validate_symbol(sym));
            }
-            SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {
+            SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {}
            }
            SymbolKind::AmbiguousId(id) => {
                panic!("ambiguous id `{}` encountered after name resolution", id)
            }
@ -163,49 +239,29 @@ impl<'grammar> Validator<'grammar> {
            // this terminal has a defined conversion.
            Some(ref c) => {
                if !c.contains(&term) {
-                    return_err!(span, "terminal `{}` does not have a pattern defined for it",
+                    return_err!(span,
                                "terminal `{}` does not have a pattern defined for it",
                                term);
                }
            }
            // If there is no extern token definition, then collect
            // the terminal literals ("class", r"[a-z]+") into a set.
-            None => match term {
+            None => {
-                // FIMXE: Should not allow undefined literals if no CatchAll
+                match term {
-                TerminalString::Bare(c) => match self.user_name_to_match_map.get(&term) {
+                    TerminalString::Bare(_) => {
-                    Some(&vl) => {
+                        assert!(self.match_block.match_user_names.contains(&term),
-                        // FIXME: I don't think this span here is correct
+                                "bare terminal without match entry: {}",
-                        self.all_literals.entry(vl).or_insert(span);
+                                term)
                    }
-                    None => {
+                    TerminalString::Literal(l) => {
-                        // Bare identifiers like `x` can never be resolved
+                        self.match_block.add_literal_from_grammar(l, span)?
                        // as terminals unless there is a conversion or mapping
                        // defined for them that indicates they are a
                        // terminal; otherwise it's just an unresolved
                        // identifier.
                        panic!("bare literal `{}` without extern token definition", c);
                    }
                },
                TerminalString::Literal(l) => match self.user_name_to_match_map.get(&term) {
                    Some(&vl) => {
                        // FIXME: I don't think this span here is correct
                        self.all_literals.entry(vl).or_insert(span);
                    }
-                    None => {
+                    // Error is a builtin terminal that always exists
-                        if self.match_catch_all {
+                    TerminalString::Error => (),
-                            self.all_literals.entry(l).or_insert(span);
+                }
                        } else {
                            return_err!(span, "terminal `{}` does not have a match mapping defined for it",
                                        term);
                        }
                    }
                },
                // Error is a builtin terminal that always exists
                TerminalString::Error => (),
            }
        }
@ -217,38 +273,36 @@ impl<'grammar> Validator<'grammar> {
 // Construction phase -- if we are constructing a tokenizer, this
 // phase builds up an internal token DFA.
-pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>, match_to_user_name_map: Map<TerminalLiteral, TerminalString>) -> NormResult<()> {
+fn construct(grammar: &mut Grammar, match_block: MatchBlock) -> NormResult<()> {
-    let mut literals: Vec<TerminalLiteral> =
+    let MatchBlock {
-        literals_map.keys()
+        mut match_entries,
-                    .cloned()
+        spans,
-                    .collect();
+        ..
    } = match_block;
-    // Sort literals by order of increasing precedence.
+    // Sort match entries by order of increasing precedence.
-    literals.sort_by_key(|literal| literal.precedence());
+    match_entries.sort();
    // Build up two vectors, one of parsed regular expressions and
    // one of precedences, that are parallel with `literals`.
-    let mut regexs = Vec::with_capacity(literals.len());
+    let mut regexs = Vec::with_capacity(match_entries.len());
-    let mut precedences = Vec::with_capacity(literals.len());
+    let mut precedences = Vec::with_capacity(match_entries.len());
    try!(intern::read(|interner| {
-        for &literal in &literals {
+        for match_entry in &match_entries {
-            precedences.push(Precedence(literal.precedence()));
+            precedences.push(Precedence(match_entry.precedence));
-            match literal {
+            match match_entry.match_literal {
-                TerminalLiteral::Quoted(s, _) => {
+                TerminalLiteral::Quoted(s) => {
                    regexs.push(re::parse_literal(interner.data(s)));
                }
-                TerminalLiteral::Regex(s, _) => {
+                TerminalLiteral::Regex(s) => {
                    match re::parse_regex(interner.data(s)) {
                        Ok(regex) => regexs.push(regex),
                        Err(error) => {
-                            let literal_span = literals_map[&literal];
+                            let literal_span = spans[&match_entry.match_literal];
                            // FIXME -- take offset into account for
                            // span; this requires knowing how many #
                            // the user used, which we do not track
-                            return_err!(
+                            return_err!(literal_span, "invalid regular expression: {}", error);
                                literal_span,
                                "invalid regular expression: {}",
                                error);
                        }
                    }
                }
@ -267,31 +321,28 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
                LineBoundary => r#"line boundaries (`^` or `$`)"#,
                TextBoundary => r#"text boundaries (`^` or `$`)"#,
            };
-            let literal = literals[index.index()];
+            let literal = match_entries[index.index()].match_literal;
-            let span = literals_map[&literal];
+            return_err!(spans[&literal],
-            return_err!(
+                        "{} are not supported in regular expressions",
-                span,
+                        feature)
                "{} are not supported in regular expressions",
                feature)
        }
        Err(DFAConstructionError::Ambiguity { match0, match1 }) => {
-            let literal0 = literals[match0.index()];
+            let literal0 = match_entries[match0.index()].match_literal;
-            let literal1 = literals[match1.index()];
+            let literal1 = match_entries[match1.index()].match_literal;
            let span0 = literals_map[&literal0];
            let _span1 = literals_map[&literal1];
            // FIXME(#88) -- it'd be nice to give an example here
-            return_err!(
+            return_err!(spans[&literal0],
-                span0,
+                        "ambiguity detected between the terminal `{}` and the terminal `{}`",
-                "ambiguity detected between the terminal `{}` and the terminal `{}`",
+                        literal0,
-                literal0, literal1);
+                        literal1)
        }
    };
-    grammar.items.push(GrammarItem::InternToken(InternToken {
+    grammar
-        literals: literals,
+        .items
-        match_to_user_name_map: match_to_user_name_map,
+        .push(GrammarItem::InternToken(InternToken {
-        dfa: dfa
+                                           match_entries: match_entries,
-    }));
+                                           dfa: dfa,
                                       }));
    // we need to inject a `'input` lifetime and `input: &'input str` parameter as well:
@ -299,38 +350,36 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
    for parameter in &grammar.type_parameters {
        match *parameter {
            TypeParameter::Lifetime(i) if i == input_lifetime => {
-                return_err!(
+                return_err!(grammar.span,
-                    grammar.span,
+                            "since there is no external token enum specified, \
                    "since there is no external token enum specified, \
                     the `'input` lifetime is implicit and cannot be declared");
            }
-            _ => { }
+            _ => {}
        }
    }
    let input_parameter = intern(INPUT_PARAMETER);
    for parameter in &grammar.parameters {
        if parameter.name == input_parameter {
-            return_err!(
+            return_err!(grammar.span,
-                grammar.span,
+                        "since there is no external token enum specified, \
                "since there is no external token enum specified, \
                 the `input` parameter is implicit and cannot be declared");
        }
    }
-    grammar.type_parameters.insert(0, TypeParameter::Lifetime(input_lifetime));
+    grammar
        .type_parameters
        .insert(0, TypeParameter::Lifetime(input_lifetime));
    let parameter = Parameter {
        name: input_parameter,
        ty: TypeRef::Ref {
            lifetime: Some(input_lifetime),
            mutable: false,
-            referent: Box::new(TypeRef::Id(intern("str")))
+            referent: Box::new(TypeRef::Id(intern("str"))),
-        }
+        },
    };
    grammar.parameters.push(parameter);
    Ok(())
 }
--- a/lalrpop/src/normalize/token_check/test.rs
+++ b/lalrpop/src/normalize/token_check/test.rs
@ -24,19 +24,19 @@ fn check_intern_token(grammar: &str,
    let parsed_grammar = validate_grammar(&grammar).expect("validate");
    let intern_token = parsed_grammar.intern_token().expect("intern_token");
    println!("intern_token: {:?}", intern_token);
-    for (input, expected_literal) in expected_tokens {
+    for (input, expected_user_name) in expected_tokens {
-        let actual_literal =
+        let actual_user_name =
            interpret::interpret(&intern_token.dfa, input)
            .map(|(index, text)| {
-                let literal = intern_token.literals[index.index()];
+                let user_name = intern_token.match_entries[index.index()].user_name;
-                (literal, text)
+                (user_name, text)
            });
-        let actual_literal = format!("{:?}", actual_literal);
+        let actual_user_name = format!("{:?}", actual_user_name);
-        if expected_literal != actual_literal {
+        if expected_user_name != actual_user_name {
            panic!("input `{}` matched `{}` but we expected `{}`",
                   input,
-                   actual_literal,
+                   actual_user_name,
-                   expected_literal);
+                   expected_user_name);
        }
    }
 }
@ -85,11 +85,11 @@ fn invalid_regular_expression_unterminated_group() {
 fn quoted_literals() {
    check_intern_token(
        r#"grammar; X = X "+" "-" "foo" "(" ")";"#,
-        vec![("+", r#"Some(("+"+1, "+"))"#),
+        vec![("+", r#"Some(("+", "+"))"#),
-             ("-", r#"Some(("-"+1, "-"))"#),
+             ("-", r#"Some(("-", "-"))"#),
-             ("(", r#"Some(("("+1, "("))"#),
+             ("(", r#"Some(("(", "("))"#),
-             (")", r#"Some((")"+1, ")"))"#),
+             (")", r#"Some((")", ")"))"#),
-             ("foo", r#"Some(("foo"+1, "foo"))"#),
+             ("foo", r#"Some(("foo", "foo"))"#),
             ("<", r#"None"#)]);
 }
@ -98,10 +98,10 @@ fn regex_literals() {
    check_intern_token(
        r#"grammar; X = X r"[a-z]+" r"[0-9]+";"#,
        vec![
-            ("a", r##"Some((r#"[a-z]+"#+0, "a"))"##),
+            ("a", r##"Some((r#"[a-z]+"#, "a"))"##),
-            ("def", r##"Some((r#"[a-z]+"#+0, "def"))"##),
+            ("def", r##"Some((r#"[a-z]+"#, "def"))"##),
-            ("1", r##"Some((r#"[0-9]+"#+0, "1"))"##),
+            ("1", r##"Some((r#"[0-9]+"#, "1"))"##),
-            ("9123456", r##"Some((r#"[0-9]+"#+0, "9123456"))"##),
+            ("9123456", r##"Some((r#"[0-9]+"#, "9123456"))"##),
                ]);
 }
@ -110,9 +110,9 @@ fn match_mappings() {
    check_intern_token(
        r#"grammar; match { r"(?i)begin" => "BEGIN" } else { "abc" => ALPHA } X = "BEGIN" ALPHA;"#,
        vec![
-            ("BEGIN", r##"Some((r#"(?i)begin"#+4, "BEGIN"))"##),
+            ("BEGIN", r##"Some(("BEGIN", "BEGIN"))"##),
-            ("begin", r##"Some((r#"(?i)begin"#+4, "begin"))"##),
+            ("begin", r##"Some(("BEGIN", "begin"))"##),
-            ("abc", r#"Some(("abc"+3, "abc"))"#), // ALPHA
+            ("abc", r#"Some((ALPHA, "abc"))"#),
                ]);
 }
--- a/lalrpop/src/normalize/tyinfer/mod.rs
+++ b/lalrpop/src/normalize/tyinfer/mod.rs
@ -9,7 +9,7 @@ use grammar::parse_tree::{ActionKind, Alternative,
                          Path,
                          Span,
                          SymbolKind,
-                          TerminalString, TypeRef};
+                          TypeRef};
 use grammar::repr::{NominalTypeRepr, Types, TypeRepr};
 use intern::intern;
@ -79,12 +79,8 @@ impl<'grammar> TypeInferencer<'grammar> {
            let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type);
-            for &literal in &intern_token.literals {
+            for match_entry in &intern_token.match_entries {
-                let user_name = intern_token.match_to_user_name_map
+                types.add_term_type(match_entry.user_name, input_str.clone());
                                            .get(&literal)
                                            .cloned()
                                            .unwrap_or(TerminalString::Literal(literal));
                types.add_term_type(user_name, input_str.clone());
            }
            types
--- a/lalrpop/src/parser/lrgrammar.lalrpop
+++ b/lalrpop/src/parser/lrgrammar.lalrpop
@ -344,8 +344,8 @@ QuotedTerminal: TerminalString = {
 };
 QuotedLiteral: TerminalLiteral = {
-    <s:StringLiteral> => TerminalLiteral::Quoted(s, 1),
+    <s:StringLiteral> => TerminalLiteral::Quoted(s),
-    <s:RegexLiteral> => TerminalLiteral::Regex(s, 0),
+    <s:RegexLiteral> => TerminalLiteral::Regex(s),
 };
 StringLiteral: InternedString =
--- a/lalrpop/src/parser/test.rs
+++ b/lalrpop/src/parser/test.rs
@ -49,7 +49,7 @@ fn match_complex() {
            let item00 = contents0.items.get(0).unwrap();
            match *item00 {
                MatchItem::Mapped(ref sym, ref mapping, _) => {
-                    assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#+0");
+                    assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#");
                    assert_eq!(format!("{}", mapping), "\"BEGIN\"");
                },
                _ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
@ -58,7 +58,7 @@ fn match_complex() {
            let item01 = contents0.items.get(1).unwrap();
            match *item01 {
                MatchItem::Mapped(ref sym, ref mapping, _) => {
-                    assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#+0");
+                    assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#");
                    assert_eq!(format!("{}", mapping), "\"END\"");
                },
                _ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
@ -69,7 +69,7 @@ fn match_complex() {
            let item10 = contents1.items.get(0).unwrap();
            match *item10 {
                MatchItem::Mapped(ref sym, ref mapping, _) => {
-                    assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#+0");
+                    assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#");
                    assert_eq!(format!("{}", mapping), "IDENTIFIER");
                },
                _ => panic!("expected MatchItem::Mapped, but was: {:?}", item10)
@ -80,7 +80,7 @@ fn match_complex() {
            let item20 = contents2.items.get(0).unwrap();
            match *item20 {
                MatchItem::Unmapped(ref sym, _) => {
-                    assert_eq!(format!("{:?}", sym), "\"other\"+1");
+                    assert_eq!(format!("{:?}", sym), "\"other\"");
                },
                _ => panic!("expected MatchItem::Unmapped, but was: {:?}", item20)
            };