revamp how we tokenck and store the InternToken

In `InternToken`, we now coallesce everything into one `Vec<MatchEntry>`, rather than using a vector and a map. In the token-check code, the various fields associated with a match are moved into a struct.
2025-03-16 17:00:53 +00:00 · 2017-03-29 22:59:38 -06:00 · 2017-03-29 22:59:38 -06:00 · 6fe7377c22
commit 6fe7377c22
parent b75669c8d6
9 changed files with 281 additions and 220 deletions
--- a/lalrpop/src/grammar/parse_tree.rs
+++ b/lalrpop/src/grammar/parse_tree.rs
@ -15,7 +15,6 @@ use message::builder::InlineBuilder;
 use std::fmt::{Debug, Display, Formatter, Error};
 use tls::Tls;
 use util::Sep;
-use collections::Map;

 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Grammar {
@ -118,15 +117,49 @@ pub type MatchMapping = TerminalString;
 pub struct InternToken {
    /// Set of `r"foo"` and `"foo"` literals extracted from the
    /// grammar. Sorted by order of increasing precedence.
-    pub literals: Vec<TerminalLiteral>,
-
-    /// For each item remapped in a `match` block, map from the
-    /// regex we match to the name the user wants to use.
-    pub match_to_user_name_map: Map<TerminalLiteral, TerminalString>,
-
+    pub match_entries: Vec<MatchEntry>,
    pub dfa: DFA
 }

+/// In `token_check`, as we prepare to generate a tokenizer, we
+/// combine any `match` declaration the user may have given with the
+/// set of literals (e.g. `"foo"` or `r"[a-z]"`) that appear elsewhere
+/// in their in the grammar to produce a series of `MatchEntry`. Each
+/// `MatchEntry` roughly corresponds to one line in a `match` declaration.
+///
+/// So e.g. if you had
+///
+/// ```
+/// match {
+///    r"(?i)BEGIN" => "BEGIN",
+///    "+" => "+",
+/// } else {
+///    _
+/// }
+///
+/// ID = r"[a-zA-Z]+"
+/// ```
+///
+/// This would correspond to three match entries:
+/// - `MatchEntry { match_literal: r"(?i)BEGIN", user_name: "BEGIN", precedence: 2 }`
+/// - `MatchEntry { match_literal: "+", user_name: "+", precedence: 3 }`
+/// - `MatchEntry { match_literal: "r[a-zA-Z]+"", user_name: r"[a-zA-Z]+", precedence: 0 }`
+///
+/// A couple of things to note:
+///
+/// - Literals appearing in the grammar are converting into an "identity" mapping
+/// - Each match group G is combined with the implicit priority IP of 1 for literals and 0 for
+///   regex to yield the final precedence; the formula is `G*2 + IP`.
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub struct MatchEntry {
+    /// The precedence of this match entry.
+    ///
+    /// NB: This field must go first, so that `PartialOrd` sorts by precedence first!
+    pub precedence: usize,
+    pub match_literal: TerminalLiteral,
+    pub user_name: TerminalString,
+}
+
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct ExternToken {
    pub span: Span,
@ -330,28 +363,18 @@ impl TerminalString {

 #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
 pub enum TerminalLiteral {
-    Quoted(InternedString, usize),
-    Regex(InternedString, usize),
+    Quoted(InternedString),
+    Regex(InternedString),
 }

 impl TerminalLiteral {
-    /// Currently, at least, quoted literals ("foo") always have
-    /// higher precedence than regex literals (r"foo"). This only
-    /// applies when we are creating the tokenizer anyhow.
-    pub fn precedence(&self) -> usize {
+    /// The *base precedence* is the precedence within a `match { }`
+    /// block level. It indicates that quoted things like `"foo"` get
+    /// precedence over regex matches.
+    pub fn base_precedence(&self) -> usize {
        match *self {
-            TerminalLiteral::Quoted(_, p) => p,
-            TerminalLiteral::Regex(_, p) => p,
-        }
-    }
-
-    pub fn with_match_precedence(self, p: usize) -> TerminalLiteral {
-        // Multiply times two since we still want to distinguish
-        // between quoted and regex precedence
-        let base_precedence = p * 2;
-        match self {
-            TerminalLiteral::Quoted(i, _) => TerminalLiteral::Quoted(i, base_precedence+1),
-            TerminalLiteral::Regex(i, _) => TerminalLiteral::Regex(i, base_precedence+0),
+            TerminalLiteral::Quoted(_) => 1,
+            TerminalLiteral::Regex(_) => 0,
        }
    }
 }
@ -391,11 +414,11 @@ pub struct MacroSymbol {

 impl TerminalString {
    pub fn quoted(i: InternedString) -> TerminalString {
-        TerminalString::Literal(TerminalLiteral::Quoted(i, 1))
+        TerminalString::Literal(TerminalLiteral::Quoted(i))
    }

    pub fn regex(i: InternedString) -> TerminalString {
-        TerminalString::Literal(TerminalLiteral::Regex(i, 0))
+        TerminalString::Literal(TerminalLiteral::Regex(i))
    }
 }

@ -523,9 +546,9 @@ impl Debug for TerminalString {
 impl Display for TerminalLiteral {
    fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
        match *self {
-            TerminalLiteral::Quoted(s, _) =>
+            TerminalLiteral::Quoted(s) =>
                write!(fmt, "{:?}", s), // the Debug impl adds the `"` and escaping
-            TerminalLiteral::Regex(s, _) =>
+            TerminalLiteral::Regex(s) =>
                write!(fmt, "r#{:?}#", s), // FIXME -- need to determine proper number of #
        }
    }
@ -533,10 +556,7 @@ impl Display for TerminalLiteral {

 impl Debug for TerminalLiteral {
    fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
-        match *self {
-            TerminalLiteral::Quoted(_, p) | TerminalLiteral::Regex(_, p) =>
-                write!(fmt, "{}+{}", self, p)
-        }
+        write!(fmt, "{}", self)
    }
 }

--- a/lalrpop/src/lexer/intern_token/mod.rs
+++ b/lalrpop/src/lexer/intern_token/mod.rs
@ -59,11 +59,11 @@ pub fn compile<W: Write>(
    // create a vector of rust string literals with the text of each
    // regular expression
    let regex_strings: Vec<String> = intern::read(|interner| {
-        intern_token.literals
+        intern_token.match_entries
                    .iter()
-                    .map(|&literal| match literal {
-                        TerminalLiteral::Quoted(s, _) => re::parse_literal(interner.data(s)),
-                        TerminalLiteral::Regex(s, _) => re::parse_regex(interner.data(s)).unwrap(),
+                    .map(|match_entry| match match_entry.match_literal {
+                        TerminalLiteral::Quoted(s) => re::parse_literal(interner.data(s)),
+                        TerminalLiteral::Regex(s) => re::parse_regex(interner.data(s)).unwrap(),
                    })
                    .map(|regex| {
                        // make sure all regex are anchored at the beginning of the input
@ -134,7 +134,7 @@ pub fn compile<W: Write>(
    // checking if each one matches, and remembering the longest one.
    rust!(out, "let mut {}longest_match = 0;", prefix); // length of longest match
    rust!(out, "let mut {}index = 0;", prefix); // index of longest match
-    rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.literals.len());
+    rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.match_entries.len());
    rust!(out, "if {}matches.matched({}i) {{", prefix, prefix);

    // re-run the regex to find out how long this particular match
--- a/lalrpop/src/normalize/lower/mod.rs
+++ b/lalrpop/src/normalize/lower/mod.rs
@ -71,31 +71,27 @@ impl<'s> LowerState<'s> {
                            types: vec![],
                        })),
                    };
-                    self.conversions.extend(data.literals
-                                                .iter()
-                                                .enumerate()
-                                                .map(|(index, &literal)| {
-                                                    let pattern = Pattern {
-                                                        span: span,
-                                                        kind: PatternKind::Tuple(vec![
-                                                                    Pattern {
-                                                                        span: span,
-                                                                        kind: PatternKind::Usize(index),
-                                                                    },
-                                                                    Pattern {
-                                                                        span: span,
-                                                                        kind: PatternKind::Choose(input_str.clone())
-                                                                    }
-                                                                    ]),
-                                                    };
+                    self.conversions.extend(
+                        data.match_entries
+                            .iter()
+                            .enumerate()
+                            .map(|(index, match_entry)| {
+                                let pattern = Pattern {
+                                    span: span,
+                                    kind: PatternKind::Tuple(vec![
+                                        Pattern {
+                                            span: span,
+                                            kind: PatternKind::Usize(index),
+                                        },
+                                        Pattern {
+                                            span: span,
+                                            kind: PatternKind::Choose(input_str.clone())
+                                        }
+                                    ]),
+                                };

-                                                    // FIXME: This should be cleaner
-                                                    if let Some(&m) = data.match_to_user_name_map.get(&literal) {
-                                                        return (m, pattern);
-                                                    }
-
-                                                    (TerminalString::Literal(literal), pattern)
-                                                }));
+                                (match_entry.user_name, pattern)
+                            }));
                    self.intern_token = Some(data);
                }

--- a/lalrpop/src/normalize/macro_expand/mod.rs
+++ b/lalrpop/src/normalize/macro_expand/mod.rs
@ -252,7 +252,7 @@ impl MacroExpander {
    {
        if let Some(ref c) = *opt_cond {
            match args[&c.lhs] {
-                SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs, _))) => {
+                SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs))) => {
                    match c.op {
                        ConditionOp::Equals => Ok(lhs == c.rhs),
                        ConditionOp::NotEquals => Ok(lhs != c.rhs),
--- a/lalrpop/src/normalize/token_check/mod.rs
+++ b/lalrpop/src/normalize/token_check/mod.rs
@ -13,19 +13,16 @@ use lexer::dfa::{self, DFAConstructionError, Precedence};
 use lexer::nfa::NFAConstructionError::*;
 use grammar::consts::*;
 use grammar::parse_tree::*;
-use collections::Set;
-use collections::{map, Map};
+use collections::{Map, Set};

 #[cfg(test)]
 mod test;

 pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
-    let (has_enum_token, all_literals, match_to_user_name_map) = {
+    let (has_enum_token, match_block) = {
        let opt_match_token = grammar.match_token();

-        let mut match_to_user_name_map = map();
-        let mut user_name_to_match_map = map();
-        let mut match_catch_all = false;
+        let mut match_block = MatchBlock::default();

        if let Some(mt) = opt_match_token {
            // FIXME: This should probably move _inside_ the Validator
@ -34,50 +31,50 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
                for item in &mc.items {
                    // TODO: Maybe move this into MatchItem methods
                    match *item {
-                        MatchItem::Unmapped(sym, _) => {
-                            let precedence_sym = sym.with_match_precedence(precedence);
-                            match_to_user_name_map.insert(precedence_sym, TerminalString::Literal(sym));
-                            user_name_to_match_map.insert(TerminalString::Literal(sym), precedence_sym);
-                        },
-                        MatchItem::Mapped(sym, mapping, _) => {
-                            let precedence_sym = sym.with_match_precedence(precedence);
-                            match_to_user_name_map.insert(precedence_sym, mapping);
-                            user_name_to_match_map.insert(mapping, precedence_sym);
-                        },
-                        MatchItem::CatchAll(_) => { match_catch_all = true; }
-                    };
+                        MatchItem::Unmapped(sym, span) => {
+                            match_block.add_match_entry(precedence,
+                                                 sym,
+                                                 TerminalString::Literal(sym),
+                                                 span)?;
+                        }
+                        MatchItem::Mapped(sym, user, span) => {
+                            match_block.add_match_entry(precedence, sym, user, span)?;
+                        }
+                        MatchItem::CatchAll(_) => {
+                            match_block.catch_all = true;
+                        }
+                    }
                }
            }
        } else {
            // no match block is equivalent to `match { _ }`
-            match_catch_all = true;
+            match_block.catch_all = true;
        }

        let opt_enum_token = grammar.enum_token();
        let conversions = opt_enum_token.map(|et| {
-            et.conversions.iter()
-                          .map(|conversion| conversion.from)
-                          .collect()
-        });
+                                                 et.conversions
+                                                     .iter()
+                                                     .map(|conversion| conversion.from)
+                                                     .collect()
+                                             });

        let mut validator = Validator {
            grammar: &grammar,
-            all_literals: map(),
            conversions: conversions,
-            user_name_to_match_map: user_name_to_match_map,
-            match_catch_all: match_catch_all
+            match_block: match_block,
        };

        assert!(!opt_match_token.is_some() || !opt_enum_token.is_some(),
-                    "expected to not have both match and extern");
+                "expected to not have both match and extern");

        try!(validator.validate());

-        (opt_enum_token.is_some(), validator.all_literals, match_to_user_name_map)
+        (opt_enum_token.is_some(), validator.match_block)
    };

    if !has_enum_token {
-        try!(construct(&mut grammar, all_literals, match_to_user_name_map));
+        try!(construct(&mut grammar, match_block));
    }

    Ok(grammar)
@ -91,20 +88,100 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {

 struct Validator<'grammar> {
    grammar: &'grammar Grammar,
-    all_literals: Map<TerminalLiteral, Span>,
+
+    /// If an external tokenizer is in use, then this will be
+    /// `Some(_)` and will point to all the defined conversions. In
+    /// that case, the other fields below are irrelevant.
    conversions: Option<Set<TerminalString>>,
-    user_name_to_match_map: Map<TerminalString, TerminalLiteral>,
-    match_catch_all: bool,
+
+    match_block: MatchBlock,
+}
+
+/// Data summarizing the `match { }` block, along with any literals we
+/// scraped up.
+#[derive(Default)]
+struct MatchBlock {
+    /// This map stores the `match { }` entries. If `match_catch_all`
+    /// is true, then we will grow this set with "identity mappings"
+    /// for new literals that we find.
+    match_entries: Vec<MatchEntry>,
+
+    /// The names of all terminals the user can legally type. If
+    /// `match_catch_all` is true, then if we encounter additional
+    /// terminal literals in the grammar, we will add them to this
+    /// set.
+    match_user_names: Set<TerminalString>,
+
+    /// For each terminal literal that we have to match, the span
+    /// where it appeared in user's source.  This can either be in the
+    /// `match { }` section or else in the grammar somewhere (if added
+    /// due to a catch-all, or there is no match section).
+    spans: Map<TerminalLiteral, Span>,
+
+    /// True if we should permit unrecognized literals to be used.
+    catch_all: bool,
+}
+
+impl MatchBlock {
+    fn add_match_entry(&mut self,
+                       match_group_precedence: usize,
+                       sym: TerminalLiteral,
+                       user_name: TerminalString,
+                       span: Span)
+                       -> NormResult<()> {
+        if let Some(_old_span) = self.spans.insert(sym, span) {
+            return_err!(span, "multiple match entries for `{}`", sym);
+        }
+
+        // NB: It's legal for multiple regex to produce same terminal.
+        self.match_user_names.insert(user_name);
+
+        self.match_entries
+            .push(MatchEntry {
+                      precedence: match_group_precedence * 2 + sym.base_precedence(),
+                      match_literal: sym,
+                      user_name: user_name,
+                  });
+        Ok(())
+    }
+
+    fn add_literal_from_grammar(&mut self, sym: TerminalLiteral, span: Span) -> NormResult<()> {
+        // Already saw this literal, maybe in a match entry, maybe in the grammar.
+        if self.match_user_names
+               .contains(&TerminalString::Literal(sym)) {
+            return Ok(());
+        }
+
+        if !self.catch_all {
+            return_err!(span,
+                        "terminal `{}` does not have a match mapping defined for it",
+                        sym);
+        }
+
+        self.match_user_names
+            .insert(TerminalString::Literal(sym));
+
+        self.match_entries
+            .push(MatchEntry {
+                      precedence: sym.base_precedence(),
+                      match_literal: sym,
+                      user_name: TerminalString::Literal(sym),
+                  });
+
+        self.spans.insert(sym, span);
+
+        Ok(())
+    }
 }

 impl<'grammar> Validator<'grammar> {
    fn validate(&mut self) -> NormResult<()> {
        for item in &self.grammar.items {
            match *item {
-                GrammarItem::Use(..) => { }
-                GrammarItem::MatchToken(..) => { }
-                GrammarItem::ExternToken(_) => { }
-                GrammarItem::InternToken(_) => { }
+                GrammarItem::Use(..) => {}
+                GrammarItem::MatchToken(..) => {}
+                GrammarItem::ExternToken(_) => {}
+                GrammarItem::InternToken(_) => {}
                GrammarItem::Nonterminal(ref data) => {
                    for alternative in &data.alternatives {
                        try!(self.validate_alternative(alternative));
@ -136,16 +213,15 @@ impl<'grammar> Validator<'grammar> {
            SymbolKind::Terminal(term) => {
                try!(self.validate_terminal(symbol.span, term));
            }
-            SymbolKind::Nonterminal(_) => {
-            }
+            SymbolKind::Nonterminal(_) => {}
            SymbolKind::Repeat(ref repeat) => {
                try!(self.validate_symbol(&repeat.symbol));
            }
-            SymbolKind::Choose(ref sym) | SymbolKind::Name(_, ref sym) => {
+            SymbolKind::Choose(ref sym) |
+            SymbolKind::Name(_, ref sym) => {
                try!(self.validate_symbol(sym));
            }
-            SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {
-            }
+            SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {}
            SymbolKind::AmbiguousId(id) => {
                panic!("ambiguous id `{}` encountered after name resolution", id)
            }
@ -163,49 +239,29 @@ impl<'grammar> Validator<'grammar> {
            // this terminal has a defined conversion.
            Some(ref c) => {
                if !c.contains(&term) {
-                    return_err!(span, "terminal `{}` does not have a pattern defined for it",
+                    return_err!(span,
+                                "terminal `{}` does not have a pattern defined for it",
                                term);
                }
            }

            // If there is no extern token definition, then collect
            // the terminal literals ("class", r"[a-z]+") into a set.
-            None => match term {
-                // FIMXE: Should not allow undefined literals if no CatchAll
-                TerminalString::Bare(c) => match self.user_name_to_match_map.get(&term) {
-                    Some(&vl) => {
-                        // FIXME: I don't think this span here is correct
-                        self.all_literals.entry(vl).or_insert(span);
+            None => {
+                match term {
+                    TerminalString::Bare(_) => {
+                        assert!(self.match_block.match_user_names.contains(&term),
+                                "bare terminal without match entry: {}",
+                                term)
                    }

-                    None => {
-                        // Bare identifiers like `x` can never be resolved
-                        // as terminals unless there is a conversion or mapping
-                        // defined for them that indicates they are a
-                        // terminal; otherwise it's just an unresolved
-                        // identifier.
-                        panic!("bare literal `{}` without extern token definition", c);
-                    }
-                },
-
-                TerminalString::Literal(l) => match self.user_name_to_match_map.get(&term) {
-                    Some(&vl) => {
-                        // FIXME: I don't think this span here is correct
-                        self.all_literals.entry(vl).or_insert(span);
+                    TerminalString::Literal(l) => {
+                        self.match_block.add_literal_from_grammar(l, span)?
                    }

-                    None => {
-                        if self.match_catch_all {
-                            self.all_literals.entry(l).or_insert(span);
-                        } else {
-                            return_err!(span, "terminal `{}` does not have a match mapping defined for it",
-                                        term);
-                        }
-                    }
-                },
-
-                // Error is a builtin terminal that always exists
-                TerminalString::Error => (),
+                    // Error is a builtin terminal that always exists
+                    TerminalString::Error => (),
+                }
            }
        }

@ -217,38 +273,36 @@ impl<'grammar> Validator<'grammar> {
 // Construction phase -- if we are constructing a tokenizer, this
 // phase builds up an internal token DFA.

-pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>, match_to_user_name_map: Map<TerminalLiteral, TerminalString>) -> NormResult<()> {
-    let mut literals: Vec<TerminalLiteral> =
-        literals_map.keys()
-                    .cloned()
-                    .collect();
+fn construct(grammar: &mut Grammar, match_block: MatchBlock) -> NormResult<()> {
+    let MatchBlock {
+        mut match_entries,
+        spans,
+        ..
+    } = match_block;

-    // Sort literals by order of increasing precedence.
-    literals.sort_by_key(|literal| literal.precedence());
+    // Sort match entries by order of increasing precedence.
+    match_entries.sort();

    // Build up two vectors, one of parsed regular expressions and
    // one of precedences, that are parallel with `literals`.
-    let mut regexs = Vec::with_capacity(literals.len());
-    let mut precedences = Vec::with_capacity(literals.len());
+    let mut regexs = Vec::with_capacity(match_entries.len());
+    let mut precedences = Vec::with_capacity(match_entries.len());
    try!(intern::read(|interner| {
-        for &literal in &literals {
-            precedences.push(Precedence(literal.precedence()));
-            match literal {
-                TerminalLiteral::Quoted(s, _) => {
+        for match_entry in &match_entries {
+            precedences.push(Precedence(match_entry.precedence));
+            match match_entry.match_literal {
+                TerminalLiteral::Quoted(s) => {
                    regexs.push(re::parse_literal(interner.data(s)));
                }
-                TerminalLiteral::Regex(s, _) => {
+                TerminalLiteral::Regex(s) => {
                    match re::parse_regex(interner.data(s)) {
                        Ok(regex) => regexs.push(regex),
                        Err(error) => {
-                            let literal_span = literals_map[&literal];
+                            let literal_span = spans[&match_entry.match_literal];
                            // FIXME -- take offset into account for
                            // span; this requires knowing how many #
                            // the user used, which we do not track
-                            return_err!(
-                                literal_span,
-                                "invalid regular expression: {}",
-                                error);
+                            return_err!(literal_span, "invalid regular expression: {}", error);
                        }
                    }
                }
@ -267,31 +321,28 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
                LineBoundary => r#"line boundaries (`^` or `$`)"#,
                TextBoundary => r#"text boundaries (`^` or `$`)"#,
            };
-            let literal = literals[index.index()];
-            let span = literals_map[&literal];
-            return_err!(
-                span,
-                "{} are not supported in regular expressions",
-                feature)
+            let literal = match_entries[index.index()].match_literal;
+            return_err!(spans[&literal],
+                        "{} are not supported in regular expressions",
+                        feature)
        }
        Err(DFAConstructionError::Ambiguity { match0, match1 }) => {
-            let literal0 = literals[match0.index()];
-            let literal1 = literals[match1.index()];
-            let span0 = literals_map[&literal0];
-            let _span1 = literals_map[&literal1];
+            let literal0 = match_entries[match0.index()].match_literal;
+            let literal1 = match_entries[match1.index()].match_literal;
            // FIXME(#88) -- it'd be nice to give an example here
-            return_err!(
-                span0,
-                "ambiguity detected between the terminal `{}` and the terminal `{}`",
-                literal0, literal1);
+            return_err!(spans[&literal0],
+                        "ambiguity detected between the terminal `{}` and the terminal `{}`",
+                        literal0,
+                        literal1)
        }
    };

-    grammar.items.push(GrammarItem::InternToken(InternToken {
-        literals: literals,
-        match_to_user_name_map: match_to_user_name_map,
-        dfa: dfa
-    }));
+    grammar
+        .items
+        .push(GrammarItem::InternToken(InternToken {
+                                           match_entries: match_entries,
+                                           dfa: dfa,
+                                       }));

    // we need to inject a `'input` lifetime and `input: &'input str` parameter as well:

@ -299,38 +350,36 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
    for parameter in &grammar.type_parameters {
        match *parameter {
            TypeParameter::Lifetime(i) if i == input_lifetime => {
-                return_err!(
-                    grammar.span,
-                    "since there is no external token enum specified, \
+                return_err!(grammar.span,
+                            "since there is no external token enum specified, \
                     the `'input` lifetime is implicit and cannot be declared");
            }
-            _ => { }
+            _ => {}
        }
    }

    let input_parameter = intern(INPUT_PARAMETER);
    for parameter in &grammar.parameters {
        if parameter.name == input_parameter {
-            return_err!(
-                grammar.span,
-                "since there is no external token enum specified, \
+            return_err!(grammar.span,
+                        "since there is no external token enum specified, \
                 the `input` parameter is implicit and cannot be declared");
        }
    }

-    grammar.type_parameters.insert(0, TypeParameter::Lifetime(input_lifetime));
+    grammar
+        .type_parameters
+        .insert(0, TypeParameter::Lifetime(input_lifetime));

    let parameter = Parameter {
        name: input_parameter,
        ty: TypeRef::Ref {
            lifetime: Some(input_lifetime),
            mutable: false,
-            referent: Box::new(TypeRef::Id(intern("str")))
-        }
+            referent: Box::new(TypeRef::Id(intern("str"))),
+        },
    };
    grammar.parameters.push(parameter);

    Ok(())
 }
-
-
--- a/lalrpop/src/normalize/token_check/test.rs
+++ b/lalrpop/src/normalize/token_check/test.rs
@ -24,19 +24,19 @@ fn check_intern_token(grammar: &str,
    let parsed_grammar = validate_grammar(&grammar).expect("validate");
    let intern_token = parsed_grammar.intern_token().expect("intern_token");
    println!("intern_token: {:?}", intern_token);
-    for (input, expected_literal) in expected_tokens {
-        let actual_literal =
+    for (input, expected_user_name) in expected_tokens {
+        let actual_user_name =
            interpret::interpret(&intern_token.dfa, input)
            .map(|(index, text)| {
-                let literal = intern_token.literals[index.index()];
-                (literal, text)
+                let user_name = intern_token.match_entries[index.index()].user_name;
+                (user_name, text)
            });
-        let actual_literal = format!("{:?}", actual_literal);
-        if expected_literal != actual_literal {
+        let actual_user_name = format!("{:?}", actual_user_name);
+        if expected_user_name != actual_user_name {
            panic!("input `{}` matched `{}` but we expected `{}`",
                   input,
-                   actual_literal,
-                   expected_literal);
+                   actual_user_name,
+                   expected_user_name);
        }
    }
 }
@ -85,11 +85,11 @@ fn invalid_regular_expression_unterminated_group() {
 fn quoted_literals() {
    check_intern_token(
        r#"grammar; X = X "+" "-" "foo" "(" ")";"#,
-        vec![("+", r#"Some(("+"+1, "+"))"#),
-             ("-", r#"Some(("-"+1, "-"))"#),
-             ("(", r#"Some(("("+1, "("))"#),
-             (")", r#"Some((")"+1, ")"))"#),
-             ("foo", r#"Some(("foo"+1, "foo"))"#),
+        vec![("+", r#"Some(("+", "+"))"#),
+             ("-", r#"Some(("-", "-"))"#),
+             ("(", r#"Some(("(", "("))"#),
+             (")", r#"Some((")", ")"))"#),
+             ("foo", r#"Some(("foo", "foo"))"#),
             ("<", r#"None"#)]);
 }

@ -98,10 +98,10 @@ fn regex_literals() {
    check_intern_token(
        r#"grammar; X = X r"[a-z]+" r"[0-9]+";"#,
        vec![
-            ("a", r##"Some((r#"[a-z]+"#+0, "a"))"##),
-            ("def", r##"Some((r#"[a-z]+"#+0, "def"))"##),
-            ("1", r##"Some((r#"[0-9]+"#+0, "1"))"##),
-            ("9123456", r##"Some((r#"[0-9]+"#+0, "9123456"))"##),
+            ("a", r##"Some((r#"[a-z]+"#, "a"))"##),
+            ("def", r##"Some((r#"[a-z]+"#, "def"))"##),
+            ("1", r##"Some((r#"[0-9]+"#, "1"))"##),
+            ("9123456", r##"Some((r#"[0-9]+"#, "9123456"))"##),
                ]);
 }

@ -110,9 +110,9 @@ fn match_mappings() {
    check_intern_token(
        r#"grammar; match { r"(?i)begin" => "BEGIN" } else { "abc" => ALPHA } X = "BEGIN" ALPHA;"#,
        vec![
-            ("BEGIN", r##"Some((r#"(?i)begin"#+4, "BEGIN"))"##),
-            ("begin", r##"Some((r#"(?i)begin"#+4, "begin"))"##),
-            ("abc", r#"Some(("abc"+3, "abc"))"#), // ALPHA
+            ("BEGIN", r##"Some(("BEGIN", "BEGIN"))"##),
+            ("begin", r##"Some(("BEGIN", "begin"))"##),
+            ("abc", r#"Some((ALPHA, "abc"))"#),
                ]);
 }

--- a/lalrpop/src/normalize/tyinfer/mod.rs
+++ b/lalrpop/src/normalize/tyinfer/mod.rs
@ -9,7 +9,7 @@ use grammar::parse_tree::{ActionKind, Alternative,
                          Path,
                          Span,
                          SymbolKind,
-                          TerminalString, TypeRef};
+                          TypeRef};
 use grammar::repr::{NominalTypeRepr, Types, TypeRepr};
 use intern::intern;

@ -79,12 +79,8 @@ impl<'grammar> TypeInferencer<'grammar> {

            let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type);

-            for &literal in &intern_token.literals {
-                let user_name = intern_token.match_to_user_name_map
-                                            .get(&literal)
-                                            .cloned()
-                                            .unwrap_or(TerminalString::Literal(literal));
-                types.add_term_type(user_name, input_str.clone());
+            for match_entry in &intern_token.match_entries {
+                types.add_term_type(match_entry.user_name, input_str.clone());
            }

            types
--- a/lalrpop/src/parser/lrgrammar.lalrpop
+++ b/lalrpop/src/parser/lrgrammar.lalrpop
@ -344,8 +344,8 @@ QuotedTerminal: TerminalString = {
 };

 QuotedLiteral: TerminalLiteral = {
-    <s:StringLiteral> => TerminalLiteral::Quoted(s, 1),
-    <s:RegexLiteral> => TerminalLiteral::Regex(s, 0),
+    <s:StringLiteral> => TerminalLiteral::Quoted(s),
+    <s:RegexLiteral> => TerminalLiteral::Regex(s),
 };

 StringLiteral: InternedString =
--- a/lalrpop/src/parser/test.rs
+++ b/lalrpop/src/parser/test.rs
@ -49,7 +49,7 @@ fn match_complex() {
            let item00 = contents0.items.get(0).unwrap();
            match *item00 {
                MatchItem::Mapped(ref sym, ref mapping, _) => {
-                    assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#+0");
+                    assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#");
                    assert_eq!(format!("{}", mapping), "\"BEGIN\"");
                },
                _ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
@ -58,7 +58,7 @@ fn match_complex() {
            let item01 = contents0.items.get(1).unwrap();
            match *item01 {
                MatchItem::Mapped(ref sym, ref mapping, _) => {
-                    assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#+0");
+                    assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#");
                    assert_eq!(format!("{}", mapping), "\"END\"");
                },
                _ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
@ -69,7 +69,7 @@ fn match_complex() {
            let item10 = contents1.items.get(0).unwrap();
            match *item10 {
                MatchItem::Mapped(ref sym, ref mapping, _) => {
-                    assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#+0");
+                    assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#");
                    assert_eq!(format!("{}", mapping), "IDENTIFIER");
                },
                _ => panic!("expected MatchItem::Mapped, but was: {:?}", item10)
@ -80,7 +80,7 @@ fn match_complex() {
            let item20 = contents2.items.get(0).unwrap();
            match *item20 {
                MatchItem::Unmapped(ref sym, _) => {
-                    assert_eq!(format!("{:?}", sym), "\"other\"+1");
+                    assert_eq!(format!("{:?}", sym), "\"other\"");
                },
                _ => panic!("expected MatchItem::Unmapped, but was: {:?}", item20)
            };