revamp how we tokenck and store the InternToken

In `InternToken`, we now coallesce everything into one
`Vec<MatchEntry>`, rather than using a vector and a map.  In the
token-check code, the various fields associated with a match are moved
into a struct.
This commit is contained in:
Niko Matsakis 2017-03-29 22:59:38 -06:00
parent b75669c8d6
commit 6fe7377c22
9 changed files with 281 additions and 220 deletions

View File

@ -15,7 +15,6 @@ use message::builder::InlineBuilder;
use std::fmt::{Debug, Display, Formatter, Error}; use std::fmt::{Debug, Display, Formatter, Error};
use tls::Tls; use tls::Tls;
use util::Sep; use util::Sep;
use collections::Map;
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Grammar { pub struct Grammar {
@ -118,15 +117,49 @@ pub type MatchMapping = TerminalString;
pub struct InternToken { pub struct InternToken {
/// Set of `r"foo"` and `"foo"` literals extracted from the /// Set of `r"foo"` and `"foo"` literals extracted from the
/// grammar. Sorted by order of increasing precedence. /// grammar. Sorted by order of increasing precedence.
pub literals: Vec<TerminalLiteral>, pub match_entries: Vec<MatchEntry>,
/// For each item remapped in a `match` block, map from the
/// regex we match to the name the user wants to use.
pub match_to_user_name_map: Map<TerminalLiteral, TerminalString>,
pub dfa: DFA pub dfa: DFA
} }
/// In `token_check`, as we prepare to generate a tokenizer, we
/// combine any `match` declaration the user may have given with the
/// set of literals (e.g. `"foo"` or `r"[a-z]"`) that appear elsewhere
/// in their in the grammar to produce a series of `MatchEntry`. Each
/// `MatchEntry` roughly corresponds to one line in a `match` declaration.
///
/// So e.g. if you had
///
/// ```
/// match {
/// r"(?i)BEGIN" => "BEGIN",
/// "+" => "+",
/// } else {
/// _
/// }
///
/// ID = r"[a-zA-Z]+"
/// ```
///
/// This would correspond to three match entries:
/// - `MatchEntry { match_literal: r"(?i)BEGIN", user_name: "BEGIN", precedence: 2 }`
/// - `MatchEntry { match_literal: "+", user_name: "+", precedence: 3 }`
/// - `MatchEntry { match_literal: "r[a-zA-Z]+"", user_name: r"[a-zA-Z]+", precedence: 0 }`
///
/// A couple of things to note:
///
/// - Literals appearing in the grammar are converting into an "identity" mapping
/// - Each match group G is combined with the implicit priority IP of 1 for literals and 0 for
/// regex to yield the final precedence; the formula is `G*2 + IP`.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct MatchEntry {
/// The precedence of this match entry.
///
/// NB: This field must go first, so that `PartialOrd` sorts by precedence first!
pub precedence: usize,
pub match_literal: TerminalLiteral,
pub user_name: TerminalString,
}
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct ExternToken { pub struct ExternToken {
pub span: Span, pub span: Span,
@ -330,28 +363,18 @@ impl TerminalString {
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum TerminalLiteral { pub enum TerminalLiteral {
Quoted(InternedString, usize), Quoted(InternedString),
Regex(InternedString, usize), Regex(InternedString),
} }
impl TerminalLiteral { impl TerminalLiteral {
/// Currently, at least, quoted literals ("foo") always have /// The *base precedence* is the precedence within a `match { }`
/// higher precedence than regex literals (r"foo"). This only /// block level. It indicates that quoted things like `"foo"` get
/// applies when we are creating the tokenizer anyhow. /// precedence over regex matches.
pub fn precedence(&self) -> usize { pub fn base_precedence(&self) -> usize {
match *self { match *self {
TerminalLiteral::Quoted(_, p) => p, TerminalLiteral::Quoted(_) => 1,
TerminalLiteral::Regex(_, p) => p, TerminalLiteral::Regex(_) => 0,
}
}
pub fn with_match_precedence(self, p: usize) -> TerminalLiteral {
// Multiply times two since we still want to distinguish
// between quoted and regex precedence
let base_precedence = p * 2;
match self {
TerminalLiteral::Quoted(i, _) => TerminalLiteral::Quoted(i, base_precedence+1),
TerminalLiteral::Regex(i, _) => TerminalLiteral::Regex(i, base_precedence+0),
} }
} }
} }
@ -391,11 +414,11 @@ pub struct MacroSymbol {
impl TerminalString { impl TerminalString {
pub fn quoted(i: InternedString) -> TerminalString { pub fn quoted(i: InternedString) -> TerminalString {
TerminalString::Literal(TerminalLiteral::Quoted(i, 1)) TerminalString::Literal(TerminalLiteral::Quoted(i))
} }
pub fn regex(i: InternedString) -> TerminalString { pub fn regex(i: InternedString) -> TerminalString {
TerminalString::Literal(TerminalLiteral::Regex(i, 0)) TerminalString::Literal(TerminalLiteral::Regex(i))
} }
} }
@ -523,9 +546,9 @@ impl Debug for TerminalString {
impl Display for TerminalLiteral { impl Display for TerminalLiteral {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self { match *self {
TerminalLiteral::Quoted(s, _) => TerminalLiteral::Quoted(s) =>
write!(fmt, "{:?}", s), // the Debug impl adds the `"` and escaping write!(fmt, "{:?}", s), // the Debug impl adds the `"` and escaping
TerminalLiteral::Regex(s, _) => TerminalLiteral::Regex(s) =>
write!(fmt, "r#{:?}#", s), // FIXME -- need to determine proper number of # write!(fmt, "r#{:?}#", s), // FIXME -- need to determine proper number of #
} }
} }
@ -533,10 +556,7 @@ impl Display for TerminalLiteral {
impl Debug for TerminalLiteral { impl Debug for TerminalLiteral {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self { write!(fmt, "{}", self)
TerminalLiteral::Quoted(_, p) | TerminalLiteral::Regex(_, p) =>
write!(fmt, "{}+{}", self, p)
}
} }
} }

View File

@ -59,11 +59,11 @@ pub fn compile<W: Write>(
// create a vector of rust string literals with the text of each // create a vector of rust string literals with the text of each
// regular expression // regular expression
let regex_strings: Vec<String> = intern::read(|interner| { let regex_strings: Vec<String> = intern::read(|interner| {
intern_token.literals intern_token.match_entries
.iter() .iter()
.map(|&literal| match literal { .map(|match_entry| match match_entry.match_literal {
TerminalLiteral::Quoted(s, _) => re::parse_literal(interner.data(s)), TerminalLiteral::Quoted(s) => re::parse_literal(interner.data(s)),
TerminalLiteral::Regex(s, _) => re::parse_regex(interner.data(s)).unwrap(), TerminalLiteral::Regex(s) => re::parse_regex(interner.data(s)).unwrap(),
}) })
.map(|regex| { .map(|regex| {
// make sure all regex are anchored at the beginning of the input // make sure all regex are anchored at the beginning of the input
@ -134,7 +134,7 @@ pub fn compile<W: Write>(
// checking if each one matches, and remembering the longest one. // checking if each one matches, and remembering the longest one.
rust!(out, "let mut {}longest_match = 0;", prefix); // length of longest match rust!(out, "let mut {}longest_match = 0;", prefix); // length of longest match
rust!(out, "let mut {}index = 0;", prefix); // index of longest match rust!(out, "let mut {}index = 0;", prefix); // index of longest match
rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.literals.len()); rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.match_entries.len());
rust!(out, "if {}matches.matched({}i) {{", prefix, prefix); rust!(out, "if {}matches.matched({}i) {{", prefix, prefix);
// re-run the regex to find out how long this particular match // re-run the regex to find out how long this particular match

View File

@ -71,31 +71,27 @@ impl<'s> LowerState<'s> {
types: vec![], types: vec![],
})), })),
}; };
self.conversions.extend(data.literals self.conversions.extend(
.iter() data.match_entries
.enumerate() .iter()
.map(|(index, &literal)| { .enumerate()
let pattern = Pattern { .map(|(index, match_entry)| {
span: span, let pattern = Pattern {
kind: PatternKind::Tuple(vec![ span: span,
Pattern { kind: PatternKind::Tuple(vec![
span: span, Pattern {
kind: PatternKind::Usize(index), span: span,
}, kind: PatternKind::Usize(index),
Pattern { },
span: span, Pattern {
kind: PatternKind::Choose(input_str.clone()) span: span,
} kind: PatternKind::Choose(input_str.clone())
]), }
}; ]),
};
// FIXME: This should be cleaner (match_entry.user_name, pattern)
if let Some(&m) = data.match_to_user_name_map.get(&literal) { }));
return (m, pattern);
}
(TerminalString::Literal(literal), pattern)
}));
self.intern_token = Some(data); self.intern_token = Some(data);
} }

View File

@ -252,7 +252,7 @@ impl MacroExpander {
{ {
if let Some(ref c) = *opt_cond { if let Some(ref c) = *opt_cond {
match args[&c.lhs] { match args[&c.lhs] {
SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs, _))) => { SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs))) => {
match c.op { match c.op {
ConditionOp::Equals => Ok(lhs == c.rhs), ConditionOp::Equals => Ok(lhs == c.rhs),
ConditionOp::NotEquals => Ok(lhs != c.rhs), ConditionOp::NotEquals => Ok(lhs != c.rhs),

View File

@ -13,19 +13,16 @@ use lexer::dfa::{self, DFAConstructionError, Precedence};
use lexer::nfa::NFAConstructionError::*; use lexer::nfa::NFAConstructionError::*;
use grammar::consts::*; use grammar::consts::*;
use grammar::parse_tree::*; use grammar::parse_tree::*;
use collections::Set; use collections::{Map, Set};
use collections::{map, Map};
#[cfg(test)] #[cfg(test)]
mod test; mod test;
pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> { pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
let (has_enum_token, all_literals, match_to_user_name_map) = { let (has_enum_token, match_block) = {
let opt_match_token = grammar.match_token(); let opt_match_token = grammar.match_token();
let mut match_to_user_name_map = map(); let mut match_block = MatchBlock::default();
let mut user_name_to_match_map = map();
let mut match_catch_all = false;
if let Some(mt) = opt_match_token { if let Some(mt) = opt_match_token {
// FIXME: This should probably move _inside_ the Validator // FIXME: This should probably move _inside_ the Validator
@ -34,50 +31,50 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
for item in &mc.items { for item in &mc.items {
// TODO: Maybe move this into MatchItem methods // TODO: Maybe move this into MatchItem methods
match *item { match *item {
MatchItem::Unmapped(sym, _) => { MatchItem::Unmapped(sym, span) => {
let precedence_sym = sym.with_match_precedence(precedence); match_block.add_match_entry(precedence,
match_to_user_name_map.insert(precedence_sym, TerminalString::Literal(sym)); sym,
user_name_to_match_map.insert(TerminalString::Literal(sym), precedence_sym); TerminalString::Literal(sym),
}, span)?;
MatchItem::Mapped(sym, mapping, _) => { }
let precedence_sym = sym.with_match_precedence(precedence); MatchItem::Mapped(sym, user, span) => {
match_to_user_name_map.insert(precedence_sym, mapping); match_block.add_match_entry(precedence, sym, user, span)?;
user_name_to_match_map.insert(mapping, precedence_sym); }
}, MatchItem::CatchAll(_) => {
MatchItem::CatchAll(_) => { match_catch_all = true; } match_block.catch_all = true;
}; }
}
} }
} }
} else { } else {
// no match block is equivalent to `match { _ }` // no match block is equivalent to `match { _ }`
match_catch_all = true; match_block.catch_all = true;
} }
let opt_enum_token = grammar.enum_token(); let opt_enum_token = grammar.enum_token();
let conversions = opt_enum_token.map(|et| { let conversions = opt_enum_token.map(|et| {
et.conversions.iter() et.conversions
.map(|conversion| conversion.from) .iter()
.collect() .map(|conversion| conversion.from)
}); .collect()
});
let mut validator = Validator { let mut validator = Validator {
grammar: &grammar, grammar: &grammar,
all_literals: map(),
conversions: conversions, conversions: conversions,
user_name_to_match_map: user_name_to_match_map, match_block: match_block,
match_catch_all: match_catch_all
}; };
assert!(!opt_match_token.is_some() || !opt_enum_token.is_some(), assert!(!opt_match_token.is_some() || !opt_enum_token.is_some(),
"expected to not have both match and extern"); "expected to not have both match and extern");
try!(validator.validate()); try!(validator.validate());
(opt_enum_token.is_some(), validator.all_literals, match_to_user_name_map) (opt_enum_token.is_some(), validator.match_block)
}; };
if !has_enum_token { if !has_enum_token {
try!(construct(&mut grammar, all_literals, match_to_user_name_map)); try!(construct(&mut grammar, match_block));
} }
Ok(grammar) Ok(grammar)
@ -91,20 +88,100 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
struct Validator<'grammar> { struct Validator<'grammar> {
grammar: &'grammar Grammar, grammar: &'grammar Grammar,
all_literals: Map<TerminalLiteral, Span>,
/// If an external tokenizer is in use, then this will be
/// `Some(_)` and will point to all the defined conversions. In
/// that case, the other fields below are irrelevant.
conversions: Option<Set<TerminalString>>, conversions: Option<Set<TerminalString>>,
user_name_to_match_map: Map<TerminalString, TerminalLiteral>,
match_catch_all: bool, match_block: MatchBlock,
}
/// Data summarizing the `match { }` block, along with any literals we
/// scraped up.
#[derive(Default)]
struct MatchBlock {
/// This map stores the `match { }` entries. If `match_catch_all`
/// is true, then we will grow this set with "identity mappings"
/// for new literals that we find.
match_entries: Vec<MatchEntry>,
/// The names of all terminals the user can legally type. If
/// `match_catch_all` is true, then if we encounter additional
/// terminal literals in the grammar, we will add them to this
/// set.
match_user_names: Set<TerminalString>,
/// For each terminal literal that we have to match, the span
/// where it appeared in user's source. This can either be in the
/// `match { }` section or else in the grammar somewhere (if added
/// due to a catch-all, or there is no match section).
spans: Map<TerminalLiteral, Span>,
/// True if we should permit unrecognized literals to be used.
catch_all: bool,
}
impl MatchBlock {
fn add_match_entry(&mut self,
match_group_precedence: usize,
sym: TerminalLiteral,
user_name: TerminalString,
span: Span)
-> NormResult<()> {
if let Some(_old_span) = self.spans.insert(sym, span) {
return_err!(span, "multiple match entries for `{}`", sym);
}
// NB: It's legal for multiple regex to produce same terminal.
self.match_user_names.insert(user_name);
self.match_entries
.push(MatchEntry {
precedence: match_group_precedence * 2 + sym.base_precedence(),
match_literal: sym,
user_name: user_name,
});
Ok(())
}
fn add_literal_from_grammar(&mut self, sym: TerminalLiteral, span: Span) -> NormResult<()> {
// Already saw this literal, maybe in a match entry, maybe in the grammar.
if self.match_user_names
.contains(&TerminalString::Literal(sym)) {
return Ok(());
}
if !self.catch_all {
return_err!(span,
"terminal `{}` does not have a match mapping defined for it",
sym);
}
self.match_user_names
.insert(TerminalString::Literal(sym));
self.match_entries
.push(MatchEntry {
precedence: sym.base_precedence(),
match_literal: sym,
user_name: TerminalString::Literal(sym),
});
self.spans.insert(sym, span);
Ok(())
}
} }
impl<'grammar> Validator<'grammar> { impl<'grammar> Validator<'grammar> {
fn validate(&mut self) -> NormResult<()> { fn validate(&mut self) -> NormResult<()> {
for item in &self.grammar.items { for item in &self.grammar.items {
match *item { match *item {
GrammarItem::Use(..) => { } GrammarItem::Use(..) => {}
GrammarItem::MatchToken(..) => { } GrammarItem::MatchToken(..) => {}
GrammarItem::ExternToken(_) => { } GrammarItem::ExternToken(_) => {}
GrammarItem::InternToken(_) => { } GrammarItem::InternToken(_) => {}
GrammarItem::Nonterminal(ref data) => { GrammarItem::Nonterminal(ref data) => {
for alternative in &data.alternatives { for alternative in &data.alternatives {
try!(self.validate_alternative(alternative)); try!(self.validate_alternative(alternative));
@ -136,16 +213,15 @@ impl<'grammar> Validator<'grammar> {
SymbolKind::Terminal(term) => { SymbolKind::Terminal(term) => {
try!(self.validate_terminal(symbol.span, term)); try!(self.validate_terminal(symbol.span, term));
} }
SymbolKind::Nonterminal(_) => { SymbolKind::Nonterminal(_) => {}
}
SymbolKind::Repeat(ref repeat) => { SymbolKind::Repeat(ref repeat) => {
try!(self.validate_symbol(&repeat.symbol)); try!(self.validate_symbol(&repeat.symbol));
} }
SymbolKind::Choose(ref sym) | SymbolKind::Name(_, ref sym) => { SymbolKind::Choose(ref sym) |
SymbolKind::Name(_, ref sym) => {
try!(self.validate_symbol(sym)); try!(self.validate_symbol(sym));
} }
SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => { SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {}
}
SymbolKind::AmbiguousId(id) => { SymbolKind::AmbiguousId(id) => {
panic!("ambiguous id `{}` encountered after name resolution", id) panic!("ambiguous id `{}` encountered after name resolution", id)
} }
@ -163,49 +239,29 @@ impl<'grammar> Validator<'grammar> {
// this terminal has a defined conversion. // this terminal has a defined conversion.
Some(ref c) => { Some(ref c) => {
if !c.contains(&term) { if !c.contains(&term) {
return_err!(span, "terminal `{}` does not have a pattern defined for it", return_err!(span,
"terminal `{}` does not have a pattern defined for it",
term); term);
} }
} }
// If there is no extern token definition, then collect // If there is no extern token definition, then collect
// the terminal literals ("class", r"[a-z]+") into a set. // the terminal literals ("class", r"[a-z]+") into a set.
None => match term { None => {
// FIMXE: Should not allow undefined literals if no CatchAll match term {
TerminalString::Bare(c) => match self.user_name_to_match_map.get(&term) { TerminalString::Bare(_) => {
Some(&vl) => { assert!(self.match_block.match_user_names.contains(&term),
// FIXME: I don't think this span here is correct "bare terminal without match entry: {}",
self.all_literals.entry(vl).or_insert(span); term)
} }
None => { TerminalString::Literal(l) => {
// Bare identifiers like `x` can never be resolved self.match_block.add_literal_from_grammar(l, span)?
// as terminals unless there is a conversion or mapping
// defined for them that indicates they are a
// terminal; otherwise it's just an unresolved
// identifier.
panic!("bare literal `{}` without extern token definition", c);
}
},
TerminalString::Literal(l) => match self.user_name_to_match_map.get(&term) {
Some(&vl) => {
// FIXME: I don't think this span here is correct
self.all_literals.entry(vl).or_insert(span);
} }
None => { // Error is a builtin terminal that always exists
if self.match_catch_all { TerminalString::Error => (),
self.all_literals.entry(l).or_insert(span); }
} else {
return_err!(span, "terminal `{}` does not have a match mapping defined for it",
term);
}
}
},
// Error is a builtin terminal that always exists
TerminalString::Error => (),
} }
} }
@ -217,38 +273,36 @@ impl<'grammar> Validator<'grammar> {
// Construction phase -- if we are constructing a tokenizer, this // Construction phase -- if we are constructing a tokenizer, this
// phase builds up an internal token DFA. // phase builds up an internal token DFA.
pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>, match_to_user_name_map: Map<TerminalLiteral, TerminalString>) -> NormResult<()> { fn construct(grammar: &mut Grammar, match_block: MatchBlock) -> NormResult<()> {
let mut literals: Vec<TerminalLiteral> = let MatchBlock {
literals_map.keys() mut match_entries,
.cloned() spans,
.collect(); ..
} = match_block;
// Sort literals by order of increasing precedence. // Sort match entries by order of increasing precedence.
literals.sort_by_key(|literal| literal.precedence()); match_entries.sort();
// Build up two vectors, one of parsed regular expressions and // Build up two vectors, one of parsed regular expressions and
// one of precedences, that are parallel with `literals`. // one of precedences, that are parallel with `literals`.
let mut regexs = Vec::with_capacity(literals.len()); let mut regexs = Vec::with_capacity(match_entries.len());
let mut precedences = Vec::with_capacity(literals.len()); let mut precedences = Vec::with_capacity(match_entries.len());
try!(intern::read(|interner| { try!(intern::read(|interner| {
for &literal in &literals { for match_entry in &match_entries {
precedences.push(Precedence(literal.precedence())); precedences.push(Precedence(match_entry.precedence));
match literal { match match_entry.match_literal {
TerminalLiteral::Quoted(s, _) => { TerminalLiteral::Quoted(s) => {
regexs.push(re::parse_literal(interner.data(s))); regexs.push(re::parse_literal(interner.data(s)));
} }
TerminalLiteral::Regex(s, _) => { TerminalLiteral::Regex(s) => {
match re::parse_regex(interner.data(s)) { match re::parse_regex(interner.data(s)) {
Ok(regex) => regexs.push(regex), Ok(regex) => regexs.push(regex),
Err(error) => { Err(error) => {
let literal_span = literals_map[&literal]; let literal_span = spans[&match_entry.match_literal];
// FIXME -- take offset into account for // FIXME -- take offset into account for
// span; this requires knowing how many # // span; this requires knowing how many #
// the user used, which we do not track // the user used, which we do not track
return_err!( return_err!(literal_span, "invalid regular expression: {}", error);
literal_span,
"invalid regular expression: {}",
error);
} }
} }
} }
@ -267,31 +321,28 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
LineBoundary => r#"line boundaries (`^` or `$`)"#, LineBoundary => r#"line boundaries (`^` or `$`)"#,
TextBoundary => r#"text boundaries (`^` or `$`)"#, TextBoundary => r#"text boundaries (`^` or `$`)"#,
}; };
let literal = literals[index.index()]; let literal = match_entries[index.index()].match_literal;
let span = literals_map[&literal]; return_err!(spans[&literal],
return_err!( "{} are not supported in regular expressions",
span, feature)
"{} are not supported in regular expressions",
feature)
} }
Err(DFAConstructionError::Ambiguity { match0, match1 }) => { Err(DFAConstructionError::Ambiguity { match0, match1 }) => {
let literal0 = literals[match0.index()]; let literal0 = match_entries[match0.index()].match_literal;
let literal1 = literals[match1.index()]; let literal1 = match_entries[match1.index()].match_literal;
let span0 = literals_map[&literal0];
let _span1 = literals_map[&literal1];
// FIXME(#88) -- it'd be nice to give an example here // FIXME(#88) -- it'd be nice to give an example here
return_err!( return_err!(spans[&literal0],
span0, "ambiguity detected between the terminal `{}` and the terminal `{}`",
"ambiguity detected between the terminal `{}` and the terminal `{}`", literal0,
literal0, literal1); literal1)
} }
}; };
grammar.items.push(GrammarItem::InternToken(InternToken { grammar
literals: literals, .items
match_to_user_name_map: match_to_user_name_map, .push(GrammarItem::InternToken(InternToken {
dfa: dfa match_entries: match_entries,
})); dfa: dfa,
}));
// we need to inject a `'input` lifetime and `input: &'input str` parameter as well: // we need to inject a `'input` lifetime and `input: &'input str` parameter as well:
@ -299,38 +350,36 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
for parameter in &grammar.type_parameters { for parameter in &grammar.type_parameters {
match *parameter { match *parameter {
TypeParameter::Lifetime(i) if i == input_lifetime => { TypeParameter::Lifetime(i) if i == input_lifetime => {
return_err!( return_err!(grammar.span,
grammar.span, "since there is no external token enum specified, \
"since there is no external token enum specified, \
the `'input` lifetime is implicit and cannot be declared"); the `'input` lifetime is implicit and cannot be declared");
} }
_ => { } _ => {}
} }
} }
let input_parameter = intern(INPUT_PARAMETER); let input_parameter = intern(INPUT_PARAMETER);
for parameter in &grammar.parameters { for parameter in &grammar.parameters {
if parameter.name == input_parameter { if parameter.name == input_parameter {
return_err!( return_err!(grammar.span,
grammar.span, "since there is no external token enum specified, \
"since there is no external token enum specified, \
the `input` parameter is implicit and cannot be declared"); the `input` parameter is implicit and cannot be declared");
} }
} }
grammar.type_parameters.insert(0, TypeParameter::Lifetime(input_lifetime)); grammar
.type_parameters
.insert(0, TypeParameter::Lifetime(input_lifetime));
let parameter = Parameter { let parameter = Parameter {
name: input_parameter, name: input_parameter,
ty: TypeRef::Ref { ty: TypeRef::Ref {
lifetime: Some(input_lifetime), lifetime: Some(input_lifetime),
mutable: false, mutable: false,
referent: Box::new(TypeRef::Id(intern("str"))) referent: Box::new(TypeRef::Id(intern("str"))),
} },
}; };
grammar.parameters.push(parameter); grammar.parameters.push(parameter);
Ok(()) Ok(())
} }

View File

@ -24,19 +24,19 @@ fn check_intern_token(grammar: &str,
let parsed_grammar = validate_grammar(&grammar).expect("validate"); let parsed_grammar = validate_grammar(&grammar).expect("validate");
let intern_token = parsed_grammar.intern_token().expect("intern_token"); let intern_token = parsed_grammar.intern_token().expect("intern_token");
println!("intern_token: {:?}", intern_token); println!("intern_token: {:?}", intern_token);
for (input, expected_literal) in expected_tokens { for (input, expected_user_name) in expected_tokens {
let actual_literal = let actual_user_name =
interpret::interpret(&intern_token.dfa, input) interpret::interpret(&intern_token.dfa, input)
.map(|(index, text)| { .map(|(index, text)| {
let literal = intern_token.literals[index.index()]; let user_name = intern_token.match_entries[index.index()].user_name;
(literal, text) (user_name, text)
}); });
let actual_literal = format!("{:?}", actual_literal); let actual_user_name = format!("{:?}", actual_user_name);
if expected_literal != actual_literal { if expected_user_name != actual_user_name {
panic!("input `{}` matched `{}` but we expected `{}`", panic!("input `{}` matched `{}` but we expected `{}`",
input, input,
actual_literal, actual_user_name,
expected_literal); expected_user_name);
} }
} }
} }
@ -85,11 +85,11 @@ fn invalid_regular_expression_unterminated_group() {
fn quoted_literals() { fn quoted_literals() {
check_intern_token( check_intern_token(
r#"grammar; X = X "+" "-" "foo" "(" ")";"#, r#"grammar; X = X "+" "-" "foo" "(" ")";"#,
vec![("+", r#"Some(("+"+1, "+"))"#), vec![("+", r#"Some(("+", "+"))"#),
("-", r#"Some(("-"+1, "-"))"#), ("-", r#"Some(("-", "-"))"#),
("(", r#"Some(("("+1, "("))"#), ("(", r#"Some(("(", "("))"#),
(")", r#"Some((")"+1, ")"))"#), (")", r#"Some((")", ")"))"#),
("foo", r#"Some(("foo"+1, "foo"))"#), ("foo", r#"Some(("foo", "foo"))"#),
("<", r#"None"#)]); ("<", r#"None"#)]);
} }
@ -98,10 +98,10 @@ fn regex_literals() {
check_intern_token( check_intern_token(
r#"grammar; X = X r"[a-z]+" r"[0-9]+";"#, r#"grammar; X = X r"[a-z]+" r"[0-9]+";"#,
vec![ vec![
("a", r##"Some((r#"[a-z]+"#+0, "a"))"##), ("a", r##"Some((r#"[a-z]+"#, "a"))"##),
("def", r##"Some((r#"[a-z]+"#+0, "def"))"##), ("def", r##"Some((r#"[a-z]+"#, "def"))"##),
("1", r##"Some((r#"[0-9]+"#+0, "1"))"##), ("1", r##"Some((r#"[0-9]+"#, "1"))"##),
("9123456", r##"Some((r#"[0-9]+"#+0, "9123456"))"##), ("9123456", r##"Some((r#"[0-9]+"#, "9123456"))"##),
]); ]);
} }
@ -110,9 +110,9 @@ fn match_mappings() {
check_intern_token( check_intern_token(
r#"grammar; match { r"(?i)begin" => "BEGIN" } else { "abc" => ALPHA } X = "BEGIN" ALPHA;"#, r#"grammar; match { r"(?i)begin" => "BEGIN" } else { "abc" => ALPHA } X = "BEGIN" ALPHA;"#,
vec![ vec![
("BEGIN", r##"Some((r#"(?i)begin"#+4, "BEGIN"))"##), ("BEGIN", r##"Some(("BEGIN", "BEGIN"))"##),
("begin", r##"Some((r#"(?i)begin"#+4, "begin"))"##), ("begin", r##"Some(("BEGIN", "begin"))"##),
("abc", r#"Some(("abc"+3, "abc"))"#), // ALPHA ("abc", r#"Some((ALPHA, "abc"))"#),
]); ]);
} }

View File

@ -9,7 +9,7 @@ use grammar::parse_tree::{ActionKind, Alternative,
Path, Path,
Span, Span,
SymbolKind, SymbolKind,
TerminalString, TypeRef}; TypeRef};
use grammar::repr::{NominalTypeRepr, Types, TypeRepr}; use grammar::repr::{NominalTypeRepr, Types, TypeRepr};
use intern::intern; use intern::intern;
@ -79,12 +79,8 @@ impl<'grammar> TypeInferencer<'grammar> {
let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type); let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type);
for &literal in &intern_token.literals { for match_entry in &intern_token.match_entries {
let user_name = intern_token.match_to_user_name_map types.add_term_type(match_entry.user_name, input_str.clone());
.get(&literal)
.cloned()
.unwrap_or(TerminalString::Literal(literal));
types.add_term_type(user_name, input_str.clone());
} }
types types

View File

@ -344,8 +344,8 @@ QuotedTerminal: TerminalString = {
}; };
QuotedLiteral: TerminalLiteral = { QuotedLiteral: TerminalLiteral = {
<s:StringLiteral> => TerminalLiteral::Quoted(s, 1), <s:StringLiteral> => TerminalLiteral::Quoted(s),
<s:RegexLiteral> => TerminalLiteral::Regex(s, 0), <s:RegexLiteral> => TerminalLiteral::Regex(s),
}; };
StringLiteral: InternedString = StringLiteral: InternedString =

View File

@ -49,7 +49,7 @@ fn match_complex() {
let item00 = contents0.items.get(0).unwrap(); let item00 = contents0.items.get(0).unwrap();
match *item00 { match *item00 {
MatchItem::Mapped(ref sym, ref mapping, _) => { MatchItem::Mapped(ref sym, ref mapping, _) => {
assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#+0"); assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#");
assert_eq!(format!("{}", mapping), "\"BEGIN\""); assert_eq!(format!("{}", mapping), "\"BEGIN\"");
}, },
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item00) _ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
@ -58,7 +58,7 @@ fn match_complex() {
let item01 = contents0.items.get(1).unwrap(); let item01 = contents0.items.get(1).unwrap();
match *item01 { match *item01 {
MatchItem::Mapped(ref sym, ref mapping, _) => { MatchItem::Mapped(ref sym, ref mapping, _) => {
assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#+0"); assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#");
assert_eq!(format!("{}", mapping), "\"END\""); assert_eq!(format!("{}", mapping), "\"END\"");
}, },
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item00) _ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
@ -69,7 +69,7 @@ fn match_complex() {
let item10 = contents1.items.get(0).unwrap(); let item10 = contents1.items.get(0).unwrap();
match *item10 { match *item10 {
MatchItem::Mapped(ref sym, ref mapping, _) => { MatchItem::Mapped(ref sym, ref mapping, _) => {
assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#+0"); assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#");
assert_eq!(format!("{}", mapping), "IDENTIFIER"); assert_eq!(format!("{}", mapping), "IDENTIFIER");
}, },
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item10) _ => panic!("expected MatchItem::Mapped, but was: {:?}", item10)
@ -80,7 +80,7 @@ fn match_complex() {
let item20 = contents2.items.get(0).unwrap(); let item20 = contents2.items.get(0).unwrap();
match *item20 { match *item20 {
MatchItem::Unmapped(ref sym, _) => { MatchItem::Unmapped(ref sym, _) => {
assert_eq!(format!("{:?}", sym), "\"other\"+1"); assert_eq!(format!("{:?}", sym), "\"other\"");
}, },
_ => panic!("expected MatchItem::Unmapped, but was: {:?}", item20) _ => panic!("expected MatchItem::Unmapped, but was: {:?}", item20)
}; };