mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-16 17:00:53 +00:00
revamp how we tokenck and store the InternToken
In `InternToken`, we now coallesce everything into one `Vec<MatchEntry>`, rather than using a vector and a map. In the token-check code, the various fields associated with a match are moved into a struct.
This commit is contained in:
parent
b75669c8d6
commit
6fe7377c22
@ -15,7 +15,6 @@ use message::builder::InlineBuilder;
|
||||
use std::fmt::{Debug, Display, Formatter, Error};
|
||||
use tls::Tls;
|
||||
use util::Sep;
|
||||
use collections::Map;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Grammar {
|
||||
@ -118,15 +117,49 @@ pub type MatchMapping = TerminalString;
|
||||
pub struct InternToken {
|
||||
/// Set of `r"foo"` and `"foo"` literals extracted from the
|
||||
/// grammar. Sorted by order of increasing precedence.
|
||||
pub literals: Vec<TerminalLiteral>,
|
||||
|
||||
/// For each item remapped in a `match` block, map from the
|
||||
/// regex we match to the name the user wants to use.
|
||||
pub match_to_user_name_map: Map<TerminalLiteral, TerminalString>,
|
||||
|
||||
pub match_entries: Vec<MatchEntry>,
|
||||
pub dfa: DFA
|
||||
}
|
||||
|
||||
/// In `token_check`, as we prepare to generate a tokenizer, we
|
||||
/// combine any `match` declaration the user may have given with the
|
||||
/// set of literals (e.g. `"foo"` or `r"[a-z]"`) that appear elsewhere
|
||||
/// in their in the grammar to produce a series of `MatchEntry`. Each
|
||||
/// `MatchEntry` roughly corresponds to one line in a `match` declaration.
|
||||
///
|
||||
/// So e.g. if you had
|
||||
///
|
||||
/// ```
|
||||
/// match {
|
||||
/// r"(?i)BEGIN" => "BEGIN",
|
||||
/// "+" => "+",
|
||||
/// } else {
|
||||
/// _
|
||||
/// }
|
||||
///
|
||||
/// ID = r"[a-zA-Z]+"
|
||||
/// ```
|
||||
///
|
||||
/// This would correspond to three match entries:
|
||||
/// - `MatchEntry { match_literal: r"(?i)BEGIN", user_name: "BEGIN", precedence: 2 }`
|
||||
/// - `MatchEntry { match_literal: "+", user_name: "+", precedence: 3 }`
|
||||
/// - `MatchEntry { match_literal: "r[a-zA-Z]+"", user_name: r"[a-zA-Z]+", precedence: 0 }`
|
||||
///
|
||||
/// A couple of things to note:
|
||||
///
|
||||
/// - Literals appearing in the grammar are converting into an "identity" mapping
|
||||
/// - Each match group G is combined with the implicit priority IP of 1 for literals and 0 for
|
||||
/// regex to yield the final precedence; the formula is `G*2 + IP`.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct MatchEntry {
|
||||
/// The precedence of this match entry.
|
||||
///
|
||||
/// NB: This field must go first, so that `PartialOrd` sorts by precedence first!
|
||||
pub precedence: usize,
|
||||
pub match_literal: TerminalLiteral,
|
||||
pub user_name: TerminalString,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct ExternToken {
|
||||
pub span: Span,
|
||||
@ -330,28 +363,18 @@ impl TerminalString {
|
||||
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum TerminalLiteral {
|
||||
Quoted(InternedString, usize),
|
||||
Regex(InternedString, usize),
|
||||
Quoted(InternedString),
|
||||
Regex(InternedString),
|
||||
}
|
||||
|
||||
impl TerminalLiteral {
|
||||
/// Currently, at least, quoted literals ("foo") always have
|
||||
/// higher precedence than regex literals (r"foo"). This only
|
||||
/// applies when we are creating the tokenizer anyhow.
|
||||
pub fn precedence(&self) -> usize {
|
||||
/// The *base precedence* is the precedence within a `match { }`
|
||||
/// block level. It indicates that quoted things like `"foo"` get
|
||||
/// precedence over regex matches.
|
||||
pub fn base_precedence(&self) -> usize {
|
||||
match *self {
|
||||
TerminalLiteral::Quoted(_, p) => p,
|
||||
TerminalLiteral::Regex(_, p) => p,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_match_precedence(self, p: usize) -> TerminalLiteral {
|
||||
// Multiply times two since we still want to distinguish
|
||||
// between quoted and regex precedence
|
||||
let base_precedence = p * 2;
|
||||
match self {
|
||||
TerminalLiteral::Quoted(i, _) => TerminalLiteral::Quoted(i, base_precedence+1),
|
||||
TerminalLiteral::Regex(i, _) => TerminalLiteral::Regex(i, base_precedence+0),
|
||||
TerminalLiteral::Quoted(_) => 1,
|
||||
TerminalLiteral::Regex(_) => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -391,11 +414,11 @@ pub struct MacroSymbol {
|
||||
|
||||
impl TerminalString {
|
||||
pub fn quoted(i: InternedString) -> TerminalString {
|
||||
TerminalString::Literal(TerminalLiteral::Quoted(i, 1))
|
||||
TerminalString::Literal(TerminalLiteral::Quoted(i))
|
||||
}
|
||||
|
||||
pub fn regex(i: InternedString) -> TerminalString {
|
||||
TerminalString::Literal(TerminalLiteral::Regex(i, 0))
|
||||
TerminalString::Literal(TerminalLiteral::Regex(i))
|
||||
}
|
||||
}
|
||||
|
||||
@ -523,9 +546,9 @@ impl Debug for TerminalString {
|
||||
impl Display for TerminalLiteral {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||
match *self {
|
||||
TerminalLiteral::Quoted(s, _) =>
|
||||
TerminalLiteral::Quoted(s) =>
|
||||
write!(fmt, "{:?}", s), // the Debug impl adds the `"` and escaping
|
||||
TerminalLiteral::Regex(s, _) =>
|
||||
TerminalLiteral::Regex(s) =>
|
||||
write!(fmt, "r#{:?}#", s), // FIXME -- need to determine proper number of #
|
||||
}
|
||||
}
|
||||
@ -533,10 +556,7 @@ impl Display for TerminalLiteral {
|
||||
|
||||
impl Debug for TerminalLiteral {
|
||||
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||
match *self {
|
||||
TerminalLiteral::Quoted(_, p) | TerminalLiteral::Regex(_, p) =>
|
||||
write!(fmt, "{}+{}", self, p)
|
||||
}
|
||||
write!(fmt, "{}", self)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,11 +59,11 @@ pub fn compile<W: Write>(
|
||||
// create a vector of rust string literals with the text of each
|
||||
// regular expression
|
||||
let regex_strings: Vec<String> = intern::read(|interner| {
|
||||
intern_token.literals
|
||||
intern_token.match_entries
|
||||
.iter()
|
||||
.map(|&literal| match literal {
|
||||
TerminalLiteral::Quoted(s, _) => re::parse_literal(interner.data(s)),
|
||||
TerminalLiteral::Regex(s, _) => re::parse_regex(interner.data(s)).unwrap(),
|
||||
.map(|match_entry| match match_entry.match_literal {
|
||||
TerminalLiteral::Quoted(s) => re::parse_literal(interner.data(s)),
|
||||
TerminalLiteral::Regex(s) => re::parse_regex(interner.data(s)).unwrap(),
|
||||
})
|
||||
.map(|regex| {
|
||||
// make sure all regex are anchored at the beginning of the input
|
||||
@ -134,7 +134,7 @@ pub fn compile<W: Write>(
|
||||
// checking if each one matches, and remembering the longest one.
|
||||
rust!(out, "let mut {}longest_match = 0;", prefix); // length of longest match
|
||||
rust!(out, "let mut {}index = 0;", prefix); // index of longest match
|
||||
rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.literals.len());
|
||||
rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.match_entries.len());
|
||||
rust!(out, "if {}matches.matched({}i) {{", prefix, prefix);
|
||||
|
||||
// re-run the regex to find out how long this particular match
|
||||
|
@ -71,31 +71,27 @@ impl<'s> LowerState<'s> {
|
||||
types: vec![],
|
||||
})),
|
||||
};
|
||||
self.conversions.extend(data.literals
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, &literal)| {
|
||||
let pattern = Pattern {
|
||||
span: span,
|
||||
kind: PatternKind::Tuple(vec![
|
||||
Pattern {
|
||||
span: span,
|
||||
kind: PatternKind::Usize(index),
|
||||
},
|
||||
Pattern {
|
||||
span: span,
|
||||
kind: PatternKind::Choose(input_str.clone())
|
||||
}
|
||||
]),
|
||||
};
|
||||
self.conversions.extend(
|
||||
data.match_entries
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, match_entry)| {
|
||||
let pattern = Pattern {
|
||||
span: span,
|
||||
kind: PatternKind::Tuple(vec![
|
||||
Pattern {
|
||||
span: span,
|
||||
kind: PatternKind::Usize(index),
|
||||
},
|
||||
Pattern {
|
||||
span: span,
|
||||
kind: PatternKind::Choose(input_str.clone())
|
||||
}
|
||||
]),
|
||||
};
|
||||
|
||||
// FIXME: This should be cleaner
|
||||
if let Some(&m) = data.match_to_user_name_map.get(&literal) {
|
||||
return (m, pattern);
|
||||
}
|
||||
|
||||
(TerminalString::Literal(literal), pattern)
|
||||
}));
|
||||
(match_entry.user_name, pattern)
|
||||
}));
|
||||
self.intern_token = Some(data);
|
||||
}
|
||||
|
||||
|
@ -252,7 +252,7 @@ impl MacroExpander {
|
||||
{
|
||||
if let Some(ref c) = *opt_cond {
|
||||
match args[&c.lhs] {
|
||||
SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs, _))) => {
|
||||
SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs))) => {
|
||||
match c.op {
|
||||
ConditionOp::Equals => Ok(lhs == c.rhs),
|
||||
ConditionOp::NotEquals => Ok(lhs != c.rhs),
|
||||
|
@ -13,19 +13,16 @@ use lexer::dfa::{self, DFAConstructionError, Precedence};
|
||||
use lexer::nfa::NFAConstructionError::*;
|
||||
use grammar::consts::*;
|
||||
use grammar::parse_tree::*;
|
||||
use collections::Set;
|
||||
use collections::{map, Map};
|
||||
use collections::{Map, Set};
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
|
||||
let (has_enum_token, all_literals, match_to_user_name_map) = {
|
||||
let (has_enum_token, match_block) = {
|
||||
let opt_match_token = grammar.match_token();
|
||||
|
||||
let mut match_to_user_name_map = map();
|
||||
let mut user_name_to_match_map = map();
|
||||
let mut match_catch_all = false;
|
||||
let mut match_block = MatchBlock::default();
|
||||
|
||||
if let Some(mt) = opt_match_token {
|
||||
// FIXME: This should probably move _inside_ the Validator
|
||||
@ -34,50 +31,50 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
|
||||
for item in &mc.items {
|
||||
// TODO: Maybe move this into MatchItem methods
|
||||
match *item {
|
||||
MatchItem::Unmapped(sym, _) => {
|
||||
let precedence_sym = sym.with_match_precedence(precedence);
|
||||
match_to_user_name_map.insert(precedence_sym, TerminalString::Literal(sym));
|
||||
user_name_to_match_map.insert(TerminalString::Literal(sym), precedence_sym);
|
||||
},
|
||||
MatchItem::Mapped(sym, mapping, _) => {
|
||||
let precedence_sym = sym.with_match_precedence(precedence);
|
||||
match_to_user_name_map.insert(precedence_sym, mapping);
|
||||
user_name_to_match_map.insert(mapping, precedence_sym);
|
||||
},
|
||||
MatchItem::CatchAll(_) => { match_catch_all = true; }
|
||||
};
|
||||
MatchItem::Unmapped(sym, span) => {
|
||||
match_block.add_match_entry(precedence,
|
||||
sym,
|
||||
TerminalString::Literal(sym),
|
||||
span)?;
|
||||
}
|
||||
MatchItem::Mapped(sym, user, span) => {
|
||||
match_block.add_match_entry(precedence, sym, user, span)?;
|
||||
}
|
||||
MatchItem::CatchAll(_) => {
|
||||
match_block.catch_all = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// no match block is equivalent to `match { _ }`
|
||||
match_catch_all = true;
|
||||
match_block.catch_all = true;
|
||||
}
|
||||
|
||||
let opt_enum_token = grammar.enum_token();
|
||||
let conversions = opt_enum_token.map(|et| {
|
||||
et.conversions.iter()
|
||||
.map(|conversion| conversion.from)
|
||||
.collect()
|
||||
});
|
||||
et.conversions
|
||||
.iter()
|
||||
.map(|conversion| conversion.from)
|
||||
.collect()
|
||||
});
|
||||
|
||||
let mut validator = Validator {
|
||||
grammar: &grammar,
|
||||
all_literals: map(),
|
||||
conversions: conversions,
|
||||
user_name_to_match_map: user_name_to_match_map,
|
||||
match_catch_all: match_catch_all
|
||||
match_block: match_block,
|
||||
};
|
||||
|
||||
assert!(!opt_match_token.is_some() || !opt_enum_token.is_some(),
|
||||
"expected to not have both match and extern");
|
||||
"expected to not have both match and extern");
|
||||
|
||||
try!(validator.validate());
|
||||
|
||||
(opt_enum_token.is_some(), validator.all_literals, match_to_user_name_map)
|
||||
(opt_enum_token.is_some(), validator.match_block)
|
||||
};
|
||||
|
||||
if !has_enum_token {
|
||||
try!(construct(&mut grammar, all_literals, match_to_user_name_map));
|
||||
try!(construct(&mut grammar, match_block));
|
||||
}
|
||||
|
||||
Ok(grammar)
|
||||
@ -91,20 +88,100 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
|
||||
|
||||
struct Validator<'grammar> {
|
||||
grammar: &'grammar Grammar,
|
||||
all_literals: Map<TerminalLiteral, Span>,
|
||||
|
||||
/// If an external tokenizer is in use, then this will be
|
||||
/// `Some(_)` and will point to all the defined conversions. In
|
||||
/// that case, the other fields below are irrelevant.
|
||||
conversions: Option<Set<TerminalString>>,
|
||||
user_name_to_match_map: Map<TerminalString, TerminalLiteral>,
|
||||
match_catch_all: bool,
|
||||
|
||||
match_block: MatchBlock,
|
||||
}
|
||||
|
||||
/// Data summarizing the `match { }` block, along with any literals we
|
||||
/// scraped up.
|
||||
#[derive(Default)]
|
||||
struct MatchBlock {
|
||||
/// This map stores the `match { }` entries. If `match_catch_all`
|
||||
/// is true, then we will grow this set with "identity mappings"
|
||||
/// for new literals that we find.
|
||||
match_entries: Vec<MatchEntry>,
|
||||
|
||||
/// The names of all terminals the user can legally type. If
|
||||
/// `match_catch_all` is true, then if we encounter additional
|
||||
/// terminal literals in the grammar, we will add them to this
|
||||
/// set.
|
||||
match_user_names: Set<TerminalString>,
|
||||
|
||||
/// For each terminal literal that we have to match, the span
|
||||
/// where it appeared in user's source. This can either be in the
|
||||
/// `match { }` section or else in the grammar somewhere (if added
|
||||
/// due to a catch-all, or there is no match section).
|
||||
spans: Map<TerminalLiteral, Span>,
|
||||
|
||||
/// True if we should permit unrecognized literals to be used.
|
||||
catch_all: bool,
|
||||
}
|
||||
|
||||
impl MatchBlock {
|
||||
fn add_match_entry(&mut self,
|
||||
match_group_precedence: usize,
|
||||
sym: TerminalLiteral,
|
||||
user_name: TerminalString,
|
||||
span: Span)
|
||||
-> NormResult<()> {
|
||||
if let Some(_old_span) = self.spans.insert(sym, span) {
|
||||
return_err!(span, "multiple match entries for `{}`", sym);
|
||||
}
|
||||
|
||||
// NB: It's legal for multiple regex to produce same terminal.
|
||||
self.match_user_names.insert(user_name);
|
||||
|
||||
self.match_entries
|
||||
.push(MatchEntry {
|
||||
precedence: match_group_precedence * 2 + sym.base_precedence(),
|
||||
match_literal: sym,
|
||||
user_name: user_name,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_literal_from_grammar(&mut self, sym: TerminalLiteral, span: Span) -> NormResult<()> {
|
||||
// Already saw this literal, maybe in a match entry, maybe in the grammar.
|
||||
if self.match_user_names
|
||||
.contains(&TerminalString::Literal(sym)) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !self.catch_all {
|
||||
return_err!(span,
|
||||
"terminal `{}` does not have a match mapping defined for it",
|
||||
sym);
|
||||
}
|
||||
|
||||
self.match_user_names
|
||||
.insert(TerminalString::Literal(sym));
|
||||
|
||||
self.match_entries
|
||||
.push(MatchEntry {
|
||||
precedence: sym.base_precedence(),
|
||||
match_literal: sym,
|
||||
user_name: TerminalString::Literal(sym),
|
||||
});
|
||||
|
||||
self.spans.insert(sym, span);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'grammar> Validator<'grammar> {
|
||||
fn validate(&mut self) -> NormResult<()> {
|
||||
for item in &self.grammar.items {
|
||||
match *item {
|
||||
GrammarItem::Use(..) => { }
|
||||
GrammarItem::MatchToken(..) => { }
|
||||
GrammarItem::ExternToken(_) => { }
|
||||
GrammarItem::InternToken(_) => { }
|
||||
GrammarItem::Use(..) => {}
|
||||
GrammarItem::MatchToken(..) => {}
|
||||
GrammarItem::ExternToken(_) => {}
|
||||
GrammarItem::InternToken(_) => {}
|
||||
GrammarItem::Nonterminal(ref data) => {
|
||||
for alternative in &data.alternatives {
|
||||
try!(self.validate_alternative(alternative));
|
||||
@ -136,16 +213,15 @@ impl<'grammar> Validator<'grammar> {
|
||||
SymbolKind::Terminal(term) => {
|
||||
try!(self.validate_terminal(symbol.span, term));
|
||||
}
|
||||
SymbolKind::Nonterminal(_) => {
|
||||
}
|
||||
SymbolKind::Nonterminal(_) => {}
|
||||
SymbolKind::Repeat(ref repeat) => {
|
||||
try!(self.validate_symbol(&repeat.symbol));
|
||||
}
|
||||
SymbolKind::Choose(ref sym) | SymbolKind::Name(_, ref sym) => {
|
||||
SymbolKind::Choose(ref sym) |
|
||||
SymbolKind::Name(_, ref sym) => {
|
||||
try!(self.validate_symbol(sym));
|
||||
}
|
||||
SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {
|
||||
}
|
||||
SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {}
|
||||
SymbolKind::AmbiguousId(id) => {
|
||||
panic!("ambiguous id `{}` encountered after name resolution", id)
|
||||
}
|
||||
@ -163,49 +239,29 @@ impl<'grammar> Validator<'grammar> {
|
||||
// this terminal has a defined conversion.
|
||||
Some(ref c) => {
|
||||
if !c.contains(&term) {
|
||||
return_err!(span, "terminal `{}` does not have a pattern defined for it",
|
||||
return_err!(span,
|
||||
"terminal `{}` does not have a pattern defined for it",
|
||||
term);
|
||||
}
|
||||
}
|
||||
|
||||
// If there is no extern token definition, then collect
|
||||
// the terminal literals ("class", r"[a-z]+") into a set.
|
||||
None => match term {
|
||||
// FIMXE: Should not allow undefined literals if no CatchAll
|
||||
TerminalString::Bare(c) => match self.user_name_to_match_map.get(&term) {
|
||||
Some(&vl) => {
|
||||
// FIXME: I don't think this span here is correct
|
||||
self.all_literals.entry(vl).or_insert(span);
|
||||
None => {
|
||||
match term {
|
||||
TerminalString::Bare(_) => {
|
||||
assert!(self.match_block.match_user_names.contains(&term),
|
||||
"bare terminal without match entry: {}",
|
||||
term)
|
||||
}
|
||||
|
||||
None => {
|
||||
// Bare identifiers like `x` can never be resolved
|
||||
// as terminals unless there is a conversion or mapping
|
||||
// defined for them that indicates they are a
|
||||
// terminal; otherwise it's just an unresolved
|
||||
// identifier.
|
||||
panic!("bare literal `{}` without extern token definition", c);
|
||||
}
|
||||
},
|
||||
|
||||
TerminalString::Literal(l) => match self.user_name_to_match_map.get(&term) {
|
||||
Some(&vl) => {
|
||||
// FIXME: I don't think this span here is correct
|
||||
self.all_literals.entry(vl).or_insert(span);
|
||||
TerminalString::Literal(l) => {
|
||||
self.match_block.add_literal_from_grammar(l, span)?
|
||||
}
|
||||
|
||||
None => {
|
||||
if self.match_catch_all {
|
||||
self.all_literals.entry(l).or_insert(span);
|
||||
} else {
|
||||
return_err!(span, "terminal `{}` does not have a match mapping defined for it",
|
||||
term);
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
// Error is a builtin terminal that always exists
|
||||
TerminalString::Error => (),
|
||||
// Error is a builtin terminal that always exists
|
||||
TerminalString::Error => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,38 +273,36 @@ impl<'grammar> Validator<'grammar> {
|
||||
// Construction phase -- if we are constructing a tokenizer, this
|
||||
// phase builds up an internal token DFA.
|
||||
|
||||
pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>, match_to_user_name_map: Map<TerminalLiteral, TerminalString>) -> NormResult<()> {
|
||||
let mut literals: Vec<TerminalLiteral> =
|
||||
literals_map.keys()
|
||||
.cloned()
|
||||
.collect();
|
||||
fn construct(grammar: &mut Grammar, match_block: MatchBlock) -> NormResult<()> {
|
||||
let MatchBlock {
|
||||
mut match_entries,
|
||||
spans,
|
||||
..
|
||||
} = match_block;
|
||||
|
||||
// Sort literals by order of increasing precedence.
|
||||
literals.sort_by_key(|literal| literal.precedence());
|
||||
// Sort match entries by order of increasing precedence.
|
||||
match_entries.sort();
|
||||
|
||||
// Build up two vectors, one of parsed regular expressions and
|
||||
// one of precedences, that are parallel with `literals`.
|
||||
let mut regexs = Vec::with_capacity(literals.len());
|
||||
let mut precedences = Vec::with_capacity(literals.len());
|
||||
let mut regexs = Vec::with_capacity(match_entries.len());
|
||||
let mut precedences = Vec::with_capacity(match_entries.len());
|
||||
try!(intern::read(|interner| {
|
||||
for &literal in &literals {
|
||||
precedences.push(Precedence(literal.precedence()));
|
||||
match literal {
|
||||
TerminalLiteral::Quoted(s, _) => {
|
||||
for match_entry in &match_entries {
|
||||
precedences.push(Precedence(match_entry.precedence));
|
||||
match match_entry.match_literal {
|
||||
TerminalLiteral::Quoted(s) => {
|
||||
regexs.push(re::parse_literal(interner.data(s)));
|
||||
}
|
||||
TerminalLiteral::Regex(s, _) => {
|
||||
TerminalLiteral::Regex(s) => {
|
||||
match re::parse_regex(interner.data(s)) {
|
||||
Ok(regex) => regexs.push(regex),
|
||||
Err(error) => {
|
||||
let literal_span = literals_map[&literal];
|
||||
let literal_span = spans[&match_entry.match_literal];
|
||||
// FIXME -- take offset into account for
|
||||
// span; this requires knowing how many #
|
||||
// the user used, which we do not track
|
||||
return_err!(
|
||||
literal_span,
|
||||
"invalid regular expression: {}",
|
||||
error);
|
||||
return_err!(literal_span, "invalid regular expression: {}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -267,31 +321,28 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
|
||||
LineBoundary => r#"line boundaries (`^` or `$`)"#,
|
||||
TextBoundary => r#"text boundaries (`^` or `$`)"#,
|
||||
};
|
||||
let literal = literals[index.index()];
|
||||
let span = literals_map[&literal];
|
||||
return_err!(
|
||||
span,
|
||||
"{} are not supported in regular expressions",
|
||||
feature)
|
||||
let literal = match_entries[index.index()].match_literal;
|
||||
return_err!(spans[&literal],
|
||||
"{} are not supported in regular expressions",
|
||||
feature)
|
||||
}
|
||||
Err(DFAConstructionError::Ambiguity { match0, match1 }) => {
|
||||
let literal0 = literals[match0.index()];
|
||||
let literal1 = literals[match1.index()];
|
||||
let span0 = literals_map[&literal0];
|
||||
let _span1 = literals_map[&literal1];
|
||||
let literal0 = match_entries[match0.index()].match_literal;
|
||||
let literal1 = match_entries[match1.index()].match_literal;
|
||||
// FIXME(#88) -- it'd be nice to give an example here
|
||||
return_err!(
|
||||
span0,
|
||||
"ambiguity detected between the terminal `{}` and the terminal `{}`",
|
||||
literal0, literal1);
|
||||
return_err!(spans[&literal0],
|
||||
"ambiguity detected between the terminal `{}` and the terminal `{}`",
|
||||
literal0,
|
||||
literal1)
|
||||
}
|
||||
};
|
||||
|
||||
grammar.items.push(GrammarItem::InternToken(InternToken {
|
||||
literals: literals,
|
||||
match_to_user_name_map: match_to_user_name_map,
|
||||
dfa: dfa
|
||||
}));
|
||||
grammar
|
||||
.items
|
||||
.push(GrammarItem::InternToken(InternToken {
|
||||
match_entries: match_entries,
|
||||
dfa: dfa,
|
||||
}));
|
||||
|
||||
// we need to inject a `'input` lifetime and `input: &'input str` parameter as well:
|
||||
|
||||
@ -299,38 +350,36 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
|
||||
for parameter in &grammar.type_parameters {
|
||||
match *parameter {
|
||||
TypeParameter::Lifetime(i) if i == input_lifetime => {
|
||||
return_err!(
|
||||
grammar.span,
|
||||
"since there is no external token enum specified, \
|
||||
return_err!(grammar.span,
|
||||
"since there is no external token enum specified, \
|
||||
the `'input` lifetime is implicit and cannot be declared");
|
||||
}
|
||||
_ => { }
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let input_parameter = intern(INPUT_PARAMETER);
|
||||
for parameter in &grammar.parameters {
|
||||
if parameter.name == input_parameter {
|
||||
return_err!(
|
||||
grammar.span,
|
||||
"since there is no external token enum specified, \
|
||||
return_err!(grammar.span,
|
||||
"since there is no external token enum specified, \
|
||||
the `input` parameter is implicit and cannot be declared");
|
||||
}
|
||||
}
|
||||
|
||||
grammar.type_parameters.insert(0, TypeParameter::Lifetime(input_lifetime));
|
||||
grammar
|
||||
.type_parameters
|
||||
.insert(0, TypeParameter::Lifetime(input_lifetime));
|
||||
|
||||
let parameter = Parameter {
|
||||
name: input_parameter,
|
||||
ty: TypeRef::Ref {
|
||||
lifetime: Some(input_lifetime),
|
||||
mutable: false,
|
||||
referent: Box::new(TypeRef::Id(intern("str")))
|
||||
}
|
||||
referent: Box::new(TypeRef::Id(intern("str"))),
|
||||
},
|
||||
};
|
||||
grammar.parameters.push(parameter);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
|
@ -24,19 +24,19 @@ fn check_intern_token(grammar: &str,
|
||||
let parsed_grammar = validate_grammar(&grammar).expect("validate");
|
||||
let intern_token = parsed_grammar.intern_token().expect("intern_token");
|
||||
println!("intern_token: {:?}", intern_token);
|
||||
for (input, expected_literal) in expected_tokens {
|
||||
let actual_literal =
|
||||
for (input, expected_user_name) in expected_tokens {
|
||||
let actual_user_name =
|
||||
interpret::interpret(&intern_token.dfa, input)
|
||||
.map(|(index, text)| {
|
||||
let literal = intern_token.literals[index.index()];
|
||||
(literal, text)
|
||||
let user_name = intern_token.match_entries[index.index()].user_name;
|
||||
(user_name, text)
|
||||
});
|
||||
let actual_literal = format!("{:?}", actual_literal);
|
||||
if expected_literal != actual_literal {
|
||||
let actual_user_name = format!("{:?}", actual_user_name);
|
||||
if expected_user_name != actual_user_name {
|
||||
panic!("input `{}` matched `{}` but we expected `{}`",
|
||||
input,
|
||||
actual_literal,
|
||||
expected_literal);
|
||||
actual_user_name,
|
||||
expected_user_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -85,11 +85,11 @@ fn invalid_regular_expression_unterminated_group() {
|
||||
fn quoted_literals() {
|
||||
check_intern_token(
|
||||
r#"grammar; X = X "+" "-" "foo" "(" ")";"#,
|
||||
vec![("+", r#"Some(("+"+1, "+"))"#),
|
||||
("-", r#"Some(("-"+1, "-"))"#),
|
||||
("(", r#"Some(("("+1, "("))"#),
|
||||
(")", r#"Some((")"+1, ")"))"#),
|
||||
("foo", r#"Some(("foo"+1, "foo"))"#),
|
||||
vec![("+", r#"Some(("+", "+"))"#),
|
||||
("-", r#"Some(("-", "-"))"#),
|
||||
("(", r#"Some(("(", "("))"#),
|
||||
(")", r#"Some((")", ")"))"#),
|
||||
("foo", r#"Some(("foo", "foo"))"#),
|
||||
("<", r#"None"#)]);
|
||||
}
|
||||
|
||||
@ -98,10 +98,10 @@ fn regex_literals() {
|
||||
check_intern_token(
|
||||
r#"grammar; X = X r"[a-z]+" r"[0-9]+";"#,
|
||||
vec![
|
||||
("a", r##"Some((r#"[a-z]+"#+0, "a"))"##),
|
||||
("def", r##"Some((r#"[a-z]+"#+0, "def"))"##),
|
||||
("1", r##"Some((r#"[0-9]+"#+0, "1"))"##),
|
||||
("9123456", r##"Some((r#"[0-9]+"#+0, "9123456"))"##),
|
||||
("a", r##"Some((r#"[a-z]+"#, "a"))"##),
|
||||
("def", r##"Some((r#"[a-z]+"#, "def"))"##),
|
||||
("1", r##"Some((r#"[0-9]+"#, "1"))"##),
|
||||
("9123456", r##"Some((r#"[0-9]+"#, "9123456"))"##),
|
||||
]);
|
||||
}
|
||||
|
||||
@ -110,9 +110,9 @@ fn match_mappings() {
|
||||
check_intern_token(
|
||||
r#"grammar; match { r"(?i)begin" => "BEGIN" } else { "abc" => ALPHA } X = "BEGIN" ALPHA;"#,
|
||||
vec![
|
||||
("BEGIN", r##"Some((r#"(?i)begin"#+4, "BEGIN"))"##),
|
||||
("begin", r##"Some((r#"(?i)begin"#+4, "begin"))"##),
|
||||
("abc", r#"Some(("abc"+3, "abc"))"#), // ALPHA
|
||||
("BEGIN", r##"Some(("BEGIN", "BEGIN"))"##),
|
||||
("begin", r##"Some(("BEGIN", "begin"))"##),
|
||||
("abc", r#"Some((ALPHA, "abc"))"#),
|
||||
]);
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@ use grammar::parse_tree::{ActionKind, Alternative,
|
||||
Path,
|
||||
Span,
|
||||
SymbolKind,
|
||||
TerminalString, TypeRef};
|
||||
TypeRef};
|
||||
use grammar::repr::{NominalTypeRepr, Types, TypeRepr};
|
||||
use intern::intern;
|
||||
|
||||
@ -79,12 +79,8 @@ impl<'grammar> TypeInferencer<'grammar> {
|
||||
|
||||
let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type);
|
||||
|
||||
for &literal in &intern_token.literals {
|
||||
let user_name = intern_token.match_to_user_name_map
|
||||
.get(&literal)
|
||||
.cloned()
|
||||
.unwrap_or(TerminalString::Literal(literal));
|
||||
types.add_term_type(user_name, input_str.clone());
|
||||
for match_entry in &intern_token.match_entries {
|
||||
types.add_term_type(match_entry.user_name, input_str.clone());
|
||||
}
|
||||
|
||||
types
|
||||
|
@ -344,8 +344,8 @@ QuotedTerminal: TerminalString = {
|
||||
};
|
||||
|
||||
QuotedLiteral: TerminalLiteral = {
|
||||
<s:StringLiteral> => TerminalLiteral::Quoted(s, 1),
|
||||
<s:RegexLiteral> => TerminalLiteral::Regex(s, 0),
|
||||
<s:StringLiteral> => TerminalLiteral::Quoted(s),
|
||||
<s:RegexLiteral> => TerminalLiteral::Regex(s),
|
||||
};
|
||||
|
||||
StringLiteral: InternedString =
|
||||
|
@ -49,7 +49,7 @@ fn match_complex() {
|
||||
let item00 = contents0.items.get(0).unwrap();
|
||||
match *item00 {
|
||||
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
||||
assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#+0");
|
||||
assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#");
|
||||
assert_eq!(format!("{}", mapping), "\"BEGIN\"");
|
||||
},
|
||||
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
|
||||
@ -58,7 +58,7 @@ fn match_complex() {
|
||||
let item01 = contents0.items.get(1).unwrap();
|
||||
match *item01 {
|
||||
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
||||
assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#+0");
|
||||
assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#");
|
||||
assert_eq!(format!("{}", mapping), "\"END\"");
|
||||
},
|
||||
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
|
||||
@ -69,7 +69,7 @@ fn match_complex() {
|
||||
let item10 = contents1.items.get(0).unwrap();
|
||||
match *item10 {
|
||||
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
||||
assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#+0");
|
||||
assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#");
|
||||
assert_eq!(format!("{}", mapping), "IDENTIFIER");
|
||||
},
|
||||
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item10)
|
||||
@ -80,7 +80,7 @@ fn match_complex() {
|
||||
let item20 = contents2.items.get(0).unwrap();
|
||||
match *item20 {
|
||||
MatchItem::Unmapped(ref sym, _) => {
|
||||
assert_eq!(format!("{:?}", sym), "\"other\"+1");
|
||||
assert_eq!(format!("{:?}", sym), "\"other\"");
|
||||
},
|
||||
_ => panic!("expected MatchItem::Unmapped, but was: {:?}", item20)
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user