mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-28 06:01:02 +00:00
revamp how we tokenck and store the InternToken
In `InternToken`, we now coallesce everything into one `Vec<MatchEntry>`, rather than using a vector and a map. In the token-check code, the various fields associated with a match are moved into a struct.
This commit is contained in:
parent
b75669c8d6
commit
6fe7377c22
@ -15,7 +15,6 @@ use message::builder::InlineBuilder;
|
|||||||
use std::fmt::{Debug, Display, Formatter, Error};
|
use std::fmt::{Debug, Display, Formatter, Error};
|
||||||
use tls::Tls;
|
use tls::Tls;
|
||||||
use util::Sep;
|
use util::Sep;
|
||||||
use collections::Map;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct Grammar {
|
pub struct Grammar {
|
||||||
@ -118,15 +117,49 @@ pub type MatchMapping = TerminalString;
|
|||||||
pub struct InternToken {
|
pub struct InternToken {
|
||||||
/// Set of `r"foo"` and `"foo"` literals extracted from the
|
/// Set of `r"foo"` and `"foo"` literals extracted from the
|
||||||
/// grammar. Sorted by order of increasing precedence.
|
/// grammar. Sorted by order of increasing precedence.
|
||||||
pub literals: Vec<TerminalLiteral>,
|
pub match_entries: Vec<MatchEntry>,
|
||||||
|
|
||||||
/// For each item remapped in a `match` block, map from the
|
|
||||||
/// regex we match to the name the user wants to use.
|
|
||||||
pub match_to_user_name_map: Map<TerminalLiteral, TerminalString>,
|
|
||||||
|
|
||||||
pub dfa: DFA
|
pub dfa: DFA
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// In `token_check`, as we prepare to generate a tokenizer, we
|
||||||
|
/// combine any `match` declaration the user may have given with the
|
||||||
|
/// set of literals (e.g. `"foo"` or `r"[a-z]"`) that appear elsewhere
|
||||||
|
/// in their in the grammar to produce a series of `MatchEntry`. Each
|
||||||
|
/// `MatchEntry` roughly corresponds to one line in a `match` declaration.
|
||||||
|
///
|
||||||
|
/// So e.g. if you had
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// match {
|
||||||
|
/// r"(?i)BEGIN" => "BEGIN",
|
||||||
|
/// "+" => "+",
|
||||||
|
/// } else {
|
||||||
|
/// _
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// ID = r"[a-zA-Z]+"
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This would correspond to three match entries:
|
||||||
|
/// - `MatchEntry { match_literal: r"(?i)BEGIN", user_name: "BEGIN", precedence: 2 }`
|
||||||
|
/// - `MatchEntry { match_literal: "+", user_name: "+", precedence: 3 }`
|
||||||
|
/// - `MatchEntry { match_literal: "r[a-zA-Z]+"", user_name: r"[a-zA-Z]+", precedence: 0 }`
|
||||||
|
///
|
||||||
|
/// A couple of things to note:
|
||||||
|
///
|
||||||
|
/// - Literals appearing in the grammar are converting into an "identity" mapping
|
||||||
|
/// - Each match group G is combined with the implicit priority IP of 1 for literals and 0 for
|
||||||
|
/// regex to yield the final precedence; the formula is `G*2 + IP`.
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct MatchEntry {
|
||||||
|
/// The precedence of this match entry.
|
||||||
|
///
|
||||||
|
/// NB: This field must go first, so that `PartialOrd` sorts by precedence first!
|
||||||
|
pub precedence: usize,
|
||||||
|
pub match_literal: TerminalLiteral,
|
||||||
|
pub user_name: TerminalString,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct ExternToken {
|
pub struct ExternToken {
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
@ -330,28 +363,18 @@ impl TerminalString {
|
|||||||
|
|
||||||
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub enum TerminalLiteral {
|
pub enum TerminalLiteral {
|
||||||
Quoted(InternedString, usize),
|
Quoted(InternedString),
|
||||||
Regex(InternedString, usize),
|
Regex(InternedString),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TerminalLiteral {
|
impl TerminalLiteral {
|
||||||
/// Currently, at least, quoted literals ("foo") always have
|
/// The *base precedence* is the precedence within a `match { }`
|
||||||
/// higher precedence than regex literals (r"foo"). This only
|
/// block level. It indicates that quoted things like `"foo"` get
|
||||||
/// applies when we are creating the tokenizer anyhow.
|
/// precedence over regex matches.
|
||||||
pub fn precedence(&self) -> usize {
|
pub fn base_precedence(&self) -> usize {
|
||||||
match *self {
|
match *self {
|
||||||
TerminalLiteral::Quoted(_, p) => p,
|
TerminalLiteral::Quoted(_) => 1,
|
||||||
TerminalLiteral::Regex(_, p) => p,
|
TerminalLiteral::Regex(_) => 0,
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_match_precedence(self, p: usize) -> TerminalLiteral {
|
|
||||||
// Multiply times two since we still want to distinguish
|
|
||||||
// between quoted and regex precedence
|
|
||||||
let base_precedence = p * 2;
|
|
||||||
match self {
|
|
||||||
TerminalLiteral::Quoted(i, _) => TerminalLiteral::Quoted(i, base_precedence+1),
|
|
||||||
TerminalLiteral::Regex(i, _) => TerminalLiteral::Regex(i, base_precedence+0),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -391,11 +414,11 @@ pub struct MacroSymbol {
|
|||||||
|
|
||||||
impl TerminalString {
|
impl TerminalString {
|
||||||
pub fn quoted(i: InternedString) -> TerminalString {
|
pub fn quoted(i: InternedString) -> TerminalString {
|
||||||
TerminalString::Literal(TerminalLiteral::Quoted(i, 1))
|
TerminalString::Literal(TerminalLiteral::Quoted(i))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn regex(i: InternedString) -> TerminalString {
|
pub fn regex(i: InternedString) -> TerminalString {
|
||||||
TerminalString::Literal(TerminalLiteral::Regex(i, 0))
|
TerminalString::Literal(TerminalLiteral::Regex(i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -523,9 +546,9 @@ impl Debug for TerminalString {
|
|||||||
impl Display for TerminalLiteral {
|
impl Display for TerminalLiteral {
|
||||||
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||||
match *self {
|
match *self {
|
||||||
TerminalLiteral::Quoted(s, _) =>
|
TerminalLiteral::Quoted(s) =>
|
||||||
write!(fmt, "{:?}", s), // the Debug impl adds the `"` and escaping
|
write!(fmt, "{:?}", s), // the Debug impl adds the `"` and escaping
|
||||||
TerminalLiteral::Regex(s, _) =>
|
TerminalLiteral::Regex(s) =>
|
||||||
write!(fmt, "r#{:?}#", s), // FIXME -- need to determine proper number of #
|
write!(fmt, "r#{:?}#", s), // FIXME -- need to determine proper number of #
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -533,10 +556,7 @@ impl Display for TerminalLiteral {
|
|||||||
|
|
||||||
impl Debug for TerminalLiteral {
|
impl Debug for TerminalLiteral {
|
||||||
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||||
match *self {
|
write!(fmt, "{}", self)
|
||||||
TerminalLiteral::Quoted(_, p) | TerminalLiteral::Regex(_, p) =>
|
|
||||||
write!(fmt, "{}+{}", self, p)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,11 +59,11 @@ pub fn compile<W: Write>(
|
|||||||
// create a vector of rust string literals with the text of each
|
// create a vector of rust string literals with the text of each
|
||||||
// regular expression
|
// regular expression
|
||||||
let regex_strings: Vec<String> = intern::read(|interner| {
|
let regex_strings: Vec<String> = intern::read(|interner| {
|
||||||
intern_token.literals
|
intern_token.match_entries
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&literal| match literal {
|
.map(|match_entry| match match_entry.match_literal {
|
||||||
TerminalLiteral::Quoted(s, _) => re::parse_literal(interner.data(s)),
|
TerminalLiteral::Quoted(s) => re::parse_literal(interner.data(s)),
|
||||||
TerminalLiteral::Regex(s, _) => re::parse_regex(interner.data(s)).unwrap(),
|
TerminalLiteral::Regex(s) => re::parse_regex(interner.data(s)).unwrap(),
|
||||||
})
|
})
|
||||||
.map(|regex| {
|
.map(|regex| {
|
||||||
// make sure all regex are anchored at the beginning of the input
|
// make sure all regex are anchored at the beginning of the input
|
||||||
@ -134,7 +134,7 @@ pub fn compile<W: Write>(
|
|||||||
// checking if each one matches, and remembering the longest one.
|
// checking if each one matches, and remembering the longest one.
|
||||||
rust!(out, "let mut {}longest_match = 0;", prefix); // length of longest match
|
rust!(out, "let mut {}longest_match = 0;", prefix); // length of longest match
|
||||||
rust!(out, "let mut {}index = 0;", prefix); // index of longest match
|
rust!(out, "let mut {}index = 0;", prefix); // index of longest match
|
||||||
rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.literals.len());
|
rust!(out, "for {}i in 0 .. {} {{", prefix, intern_token.match_entries.len());
|
||||||
rust!(out, "if {}matches.matched({}i) {{", prefix, prefix);
|
rust!(out, "if {}matches.matched({}i) {{", prefix, prefix);
|
||||||
|
|
||||||
// re-run the regex to find out how long this particular match
|
// re-run the regex to find out how long this particular match
|
||||||
|
@ -71,31 +71,27 @@ impl<'s> LowerState<'s> {
|
|||||||
types: vec![],
|
types: vec![],
|
||||||
})),
|
})),
|
||||||
};
|
};
|
||||||
self.conversions.extend(data.literals
|
self.conversions.extend(
|
||||||
.iter()
|
data.match_entries
|
||||||
.enumerate()
|
.iter()
|
||||||
.map(|(index, &literal)| {
|
.enumerate()
|
||||||
let pattern = Pattern {
|
.map(|(index, match_entry)| {
|
||||||
span: span,
|
let pattern = Pattern {
|
||||||
kind: PatternKind::Tuple(vec![
|
span: span,
|
||||||
Pattern {
|
kind: PatternKind::Tuple(vec![
|
||||||
span: span,
|
Pattern {
|
||||||
kind: PatternKind::Usize(index),
|
span: span,
|
||||||
},
|
kind: PatternKind::Usize(index),
|
||||||
Pattern {
|
},
|
||||||
span: span,
|
Pattern {
|
||||||
kind: PatternKind::Choose(input_str.clone())
|
span: span,
|
||||||
}
|
kind: PatternKind::Choose(input_str.clone())
|
||||||
]),
|
}
|
||||||
};
|
]),
|
||||||
|
};
|
||||||
|
|
||||||
// FIXME: This should be cleaner
|
(match_entry.user_name, pattern)
|
||||||
if let Some(&m) = data.match_to_user_name_map.get(&literal) {
|
}));
|
||||||
return (m, pattern);
|
|
||||||
}
|
|
||||||
|
|
||||||
(TerminalString::Literal(literal), pattern)
|
|
||||||
}));
|
|
||||||
self.intern_token = Some(data);
|
self.intern_token = Some(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -252,7 +252,7 @@ impl MacroExpander {
|
|||||||
{
|
{
|
||||||
if let Some(ref c) = *opt_cond {
|
if let Some(ref c) = *opt_cond {
|
||||||
match args[&c.lhs] {
|
match args[&c.lhs] {
|
||||||
SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs, _))) => {
|
SymbolKind::Terminal(TerminalString::Literal(TerminalLiteral::Quoted(lhs))) => {
|
||||||
match c.op {
|
match c.op {
|
||||||
ConditionOp::Equals => Ok(lhs == c.rhs),
|
ConditionOp::Equals => Ok(lhs == c.rhs),
|
||||||
ConditionOp::NotEquals => Ok(lhs != c.rhs),
|
ConditionOp::NotEquals => Ok(lhs != c.rhs),
|
||||||
|
@ -13,19 +13,16 @@ use lexer::dfa::{self, DFAConstructionError, Precedence};
|
|||||||
use lexer::nfa::NFAConstructionError::*;
|
use lexer::nfa::NFAConstructionError::*;
|
||||||
use grammar::consts::*;
|
use grammar::consts::*;
|
||||||
use grammar::parse_tree::*;
|
use grammar::parse_tree::*;
|
||||||
use collections::Set;
|
use collections::{Map, Set};
|
||||||
use collections::{map, Map};
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test;
|
mod test;
|
||||||
|
|
||||||
pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
|
pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
|
||||||
let (has_enum_token, all_literals, match_to_user_name_map) = {
|
let (has_enum_token, match_block) = {
|
||||||
let opt_match_token = grammar.match_token();
|
let opt_match_token = grammar.match_token();
|
||||||
|
|
||||||
let mut match_to_user_name_map = map();
|
let mut match_block = MatchBlock::default();
|
||||||
let mut user_name_to_match_map = map();
|
|
||||||
let mut match_catch_all = false;
|
|
||||||
|
|
||||||
if let Some(mt) = opt_match_token {
|
if let Some(mt) = opt_match_token {
|
||||||
// FIXME: This should probably move _inside_ the Validator
|
// FIXME: This should probably move _inside_ the Validator
|
||||||
@ -34,50 +31,50 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
|
|||||||
for item in &mc.items {
|
for item in &mc.items {
|
||||||
// TODO: Maybe move this into MatchItem methods
|
// TODO: Maybe move this into MatchItem methods
|
||||||
match *item {
|
match *item {
|
||||||
MatchItem::Unmapped(sym, _) => {
|
MatchItem::Unmapped(sym, span) => {
|
||||||
let precedence_sym = sym.with_match_precedence(precedence);
|
match_block.add_match_entry(precedence,
|
||||||
match_to_user_name_map.insert(precedence_sym, TerminalString::Literal(sym));
|
sym,
|
||||||
user_name_to_match_map.insert(TerminalString::Literal(sym), precedence_sym);
|
TerminalString::Literal(sym),
|
||||||
},
|
span)?;
|
||||||
MatchItem::Mapped(sym, mapping, _) => {
|
}
|
||||||
let precedence_sym = sym.with_match_precedence(precedence);
|
MatchItem::Mapped(sym, user, span) => {
|
||||||
match_to_user_name_map.insert(precedence_sym, mapping);
|
match_block.add_match_entry(precedence, sym, user, span)?;
|
||||||
user_name_to_match_map.insert(mapping, precedence_sym);
|
}
|
||||||
},
|
MatchItem::CatchAll(_) => {
|
||||||
MatchItem::CatchAll(_) => { match_catch_all = true; }
|
match_block.catch_all = true;
|
||||||
};
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// no match block is equivalent to `match { _ }`
|
// no match block is equivalent to `match { _ }`
|
||||||
match_catch_all = true;
|
match_block.catch_all = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
let opt_enum_token = grammar.enum_token();
|
let opt_enum_token = grammar.enum_token();
|
||||||
let conversions = opt_enum_token.map(|et| {
|
let conversions = opt_enum_token.map(|et| {
|
||||||
et.conversions.iter()
|
et.conversions
|
||||||
.map(|conversion| conversion.from)
|
.iter()
|
||||||
.collect()
|
.map(|conversion| conversion.from)
|
||||||
});
|
.collect()
|
||||||
|
});
|
||||||
|
|
||||||
let mut validator = Validator {
|
let mut validator = Validator {
|
||||||
grammar: &grammar,
|
grammar: &grammar,
|
||||||
all_literals: map(),
|
|
||||||
conversions: conversions,
|
conversions: conversions,
|
||||||
user_name_to_match_map: user_name_to_match_map,
|
match_block: match_block,
|
||||||
match_catch_all: match_catch_all
|
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(!opt_match_token.is_some() || !opt_enum_token.is_some(),
|
assert!(!opt_match_token.is_some() || !opt_enum_token.is_some(),
|
||||||
"expected to not have both match and extern");
|
"expected to not have both match and extern");
|
||||||
|
|
||||||
try!(validator.validate());
|
try!(validator.validate());
|
||||||
|
|
||||||
(opt_enum_token.is_some(), validator.all_literals, match_to_user_name_map)
|
(opt_enum_token.is_some(), validator.match_block)
|
||||||
};
|
};
|
||||||
|
|
||||||
if !has_enum_token {
|
if !has_enum_token {
|
||||||
try!(construct(&mut grammar, all_literals, match_to_user_name_map));
|
try!(construct(&mut grammar, match_block));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(grammar)
|
Ok(grammar)
|
||||||
@ -91,20 +88,100 @@ pub fn validate(mut grammar: Grammar) -> NormResult<Grammar> {
|
|||||||
|
|
||||||
struct Validator<'grammar> {
|
struct Validator<'grammar> {
|
||||||
grammar: &'grammar Grammar,
|
grammar: &'grammar Grammar,
|
||||||
all_literals: Map<TerminalLiteral, Span>,
|
|
||||||
|
/// If an external tokenizer is in use, then this will be
|
||||||
|
/// `Some(_)` and will point to all the defined conversions. In
|
||||||
|
/// that case, the other fields below are irrelevant.
|
||||||
conversions: Option<Set<TerminalString>>,
|
conversions: Option<Set<TerminalString>>,
|
||||||
user_name_to_match_map: Map<TerminalString, TerminalLiteral>,
|
|
||||||
match_catch_all: bool,
|
match_block: MatchBlock,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Data summarizing the `match { }` block, along with any literals we
|
||||||
|
/// scraped up.
|
||||||
|
#[derive(Default)]
|
||||||
|
struct MatchBlock {
|
||||||
|
/// This map stores the `match { }` entries. If `match_catch_all`
|
||||||
|
/// is true, then we will grow this set with "identity mappings"
|
||||||
|
/// for new literals that we find.
|
||||||
|
match_entries: Vec<MatchEntry>,
|
||||||
|
|
||||||
|
/// The names of all terminals the user can legally type. If
|
||||||
|
/// `match_catch_all` is true, then if we encounter additional
|
||||||
|
/// terminal literals in the grammar, we will add them to this
|
||||||
|
/// set.
|
||||||
|
match_user_names: Set<TerminalString>,
|
||||||
|
|
||||||
|
/// For each terminal literal that we have to match, the span
|
||||||
|
/// where it appeared in user's source. This can either be in the
|
||||||
|
/// `match { }` section or else in the grammar somewhere (if added
|
||||||
|
/// due to a catch-all, or there is no match section).
|
||||||
|
spans: Map<TerminalLiteral, Span>,
|
||||||
|
|
||||||
|
/// True if we should permit unrecognized literals to be used.
|
||||||
|
catch_all: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MatchBlock {
|
||||||
|
fn add_match_entry(&mut self,
|
||||||
|
match_group_precedence: usize,
|
||||||
|
sym: TerminalLiteral,
|
||||||
|
user_name: TerminalString,
|
||||||
|
span: Span)
|
||||||
|
-> NormResult<()> {
|
||||||
|
if let Some(_old_span) = self.spans.insert(sym, span) {
|
||||||
|
return_err!(span, "multiple match entries for `{}`", sym);
|
||||||
|
}
|
||||||
|
|
||||||
|
// NB: It's legal for multiple regex to produce same terminal.
|
||||||
|
self.match_user_names.insert(user_name);
|
||||||
|
|
||||||
|
self.match_entries
|
||||||
|
.push(MatchEntry {
|
||||||
|
precedence: match_group_precedence * 2 + sym.base_precedence(),
|
||||||
|
match_literal: sym,
|
||||||
|
user_name: user_name,
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_literal_from_grammar(&mut self, sym: TerminalLiteral, span: Span) -> NormResult<()> {
|
||||||
|
// Already saw this literal, maybe in a match entry, maybe in the grammar.
|
||||||
|
if self.match_user_names
|
||||||
|
.contains(&TerminalString::Literal(sym)) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.catch_all {
|
||||||
|
return_err!(span,
|
||||||
|
"terminal `{}` does not have a match mapping defined for it",
|
||||||
|
sym);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.match_user_names
|
||||||
|
.insert(TerminalString::Literal(sym));
|
||||||
|
|
||||||
|
self.match_entries
|
||||||
|
.push(MatchEntry {
|
||||||
|
precedence: sym.base_precedence(),
|
||||||
|
match_literal: sym,
|
||||||
|
user_name: TerminalString::Literal(sym),
|
||||||
|
});
|
||||||
|
|
||||||
|
self.spans.insert(sym, span);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'grammar> Validator<'grammar> {
|
impl<'grammar> Validator<'grammar> {
|
||||||
fn validate(&mut self) -> NormResult<()> {
|
fn validate(&mut self) -> NormResult<()> {
|
||||||
for item in &self.grammar.items {
|
for item in &self.grammar.items {
|
||||||
match *item {
|
match *item {
|
||||||
GrammarItem::Use(..) => { }
|
GrammarItem::Use(..) => {}
|
||||||
GrammarItem::MatchToken(..) => { }
|
GrammarItem::MatchToken(..) => {}
|
||||||
GrammarItem::ExternToken(_) => { }
|
GrammarItem::ExternToken(_) => {}
|
||||||
GrammarItem::InternToken(_) => { }
|
GrammarItem::InternToken(_) => {}
|
||||||
GrammarItem::Nonterminal(ref data) => {
|
GrammarItem::Nonterminal(ref data) => {
|
||||||
for alternative in &data.alternatives {
|
for alternative in &data.alternatives {
|
||||||
try!(self.validate_alternative(alternative));
|
try!(self.validate_alternative(alternative));
|
||||||
@ -136,16 +213,15 @@ impl<'grammar> Validator<'grammar> {
|
|||||||
SymbolKind::Terminal(term) => {
|
SymbolKind::Terminal(term) => {
|
||||||
try!(self.validate_terminal(symbol.span, term));
|
try!(self.validate_terminal(symbol.span, term));
|
||||||
}
|
}
|
||||||
SymbolKind::Nonterminal(_) => {
|
SymbolKind::Nonterminal(_) => {}
|
||||||
}
|
|
||||||
SymbolKind::Repeat(ref repeat) => {
|
SymbolKind::Repeat(ref repeat) => {
|
||||||
try!(self.validate_symbol(&repeat.symbol));
|
try!(self.validate_symbol(&repeat.symbol));
|
||||||
}
|
}
|
||||||
SymbolKind::Choose(ref sym) | SymbolKind::Name(_, ref sym) => {
|
SymbolKind::Choose(ref sym) |
|
||||||
|
SymbolKind::Name(_, ref sym) => {
|
||||||
try!(self.validate_symbol(sym));
|
try!(self.validate_symbol(sym));
|
||||||
}
|
}
|
||||||
SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {
|
SymbolKind::Lookahead | SymbolKind::Lookbehind | SymbolKind::Error => {}
|
||||||
}
|
|
||||||
SymbolKind::AmbiguousId(id) => {
|
SymbolKind::AmbiguousId(id) => {
|
||||||
panic!("ambiguous id `{}` encountered after name resolution", id)
|
panic!("ambiguous id `{}` encountered after name resolution", id)
|
||||||
}
|
}
|
||||||
@ -163,49 +239,29 @@ impl<'grammar> Validator<'grammar> {
|
|||||||
// this terminal has a defined conversion.
|
// this terminal has a defined conversion.
|
||||||
Some(ref c) => {
|
Some(ref c) => {
|
||||||
if !c.contains(&term) {
|
if !c.contains(&term) {
|
||||||
return_err!(span, "terminal `{}` does not have a pattern defined for it",
|
return_err!(span,
|
||||||
|
"terminal `{}` does not have a pattern defined for it",
|
||||||
term);
|
term);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there is no extern token definition, then collect
|
// If there is no extern token definition, then collect
|
||||||
// the terminal literals ("class", r"[a-z]+") into a set.
|
// the terminal literals ("class", r"[a-z]+") into a set.
|
||||||
None => match term {
|
None => {
|
||||||
// FIMXE: Should not allow undefined literals if no CatchAll
|
match term {
|
||||||
TerminalString::Bare(c) => match self.user_name_to_match_map.get(&term) {
|
TerminalString::Bare(_) => {
|
||||||
Some(&vl) => {
|
assert!(self.match_block.match_user_names.contains(&term),
|
||||||
// FIXME: I don't think this span here is correct
|
"bare terminal without match entry: {}",
|
||||||
self.all_literals.entry(vl).or_insert(span);
|
term)
|
||||||
}
|
}
|
||||||
|
|
||||||
None => {
|
TerminalString::Literal(l) => {
|
||||||
// Bare identifiers like `x` can never be resolved
|
self.match_block.add_literal_from_grammar(l, span)?
|
||||||
// as terminals unless there is a conversion or mapping
|
|
||||||
// defined for them that indicates they are a
|
|
||||||
// terminal; otherwise it's just an unresolved
|
|
||||||
// identifier.
|
|
||||||
panic!("bare literal `{}` without extern token definition", c);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
TerminalString::Literal(l) => match self.user_name_to_match_map.get(&term) {
|
|
||||||
Some(&vl) => {
|
|
||||||
// FIXME: I don't think this span here is correct
|
|
||||||
self.all_literals.entry(vl).or_insert(span);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
None => {
|
// Error is a builtin terminal that always exists
|
||||||
if self.match_catch_all {
|
TerminalString::Error => (),
|
||||||
self.all_literals.entry(l).or_insert(span);
|
}
|
||||||
} else {
|
|
||||||
return_err!(span, "terminal `{}` does not have a match mapping defined for it",
|
|
||||||
term);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
// Error is a builtin terminal that always exists
|
|
||||||
TerminalString::Error => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -217,38 +273,36 @@ impl<'grammar> Validator<'grammar> {
|
|||||||
// Construction phase -- if we are constructing a tokenizer, this
|
// Construction phase -- if we are constructing a tokenizer, this
|
||||||
// phase builds up an internal token DFA.
|
// phase builds up an internal token DFA.
|
||||||
|
|
||||||
pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>, match_to_user_name_map: Map<TerminalLiteral, TerminalString>) -> NormResult<()> {
|
fn construct(grammar: &mut Grammar, match_block: MatchBlock) -> NormResult<()> {
|
||||||
let mut literals: Vec<TerminalLiteral> =
|
let MatchBlock {
|
||||||
literals_map.keys()
|
mut match_entries,
|
||||||
.cloned()
|
spans,
|
||||||
.collect();
|
..
|
||||||
|
} = match_block;
|
||||||
|
|
||||||
// Sort literals by order of increasing precedence.
|
// Sort match entries by order of increasing precedence.
|
||||||
literals.sort_by_key(|literal| literal.precedence());
|
match_entries.sort();
|
||||||
|
|
||||||
// Build up two vectors, one of parsed regular expressions and
|
// Build up two vectors, one of parsed regular expressions and
|
||||||
// one of precedences, that are parallel with `literals`.
|
// one of precedences, that are parallel with `literals`.
|
||||||
let mut regexs = Vec::with_capacity(literals.len());
|
let mut regexs = Vec::with_capacity(match_entries.len());
|
||||||
let mut precedences = Vec::with_capacity(literals.len());
|
let mut precedences = Vec::with_capacity(match_entries.len());
|
||||||
try!(intern::read(|interner| {
|
try!(intern::read(|interner| {
|
||||||
for &literal in &literals {
|
for match_entry in &match_entries {
|
||||||
precedences.push(Precedence(literal.precedence()));
|
precedences.push(Precedence(match_entry.precedence));
|
||||||
match literal {
|
match match_entry.match_literal {
|
||||||
TerminalLiteral::Quoted(s, _) => {
|
TerminalLiteral::Quoted(s) => {
|
||||||
regexs.push(re::parse_literal(interner.data(s)));
|
regexs.push(re::parse_literal(interner.data(s)));
|
||||||
}
|
}
|
||||||
TerminalLiteral::Regex(s, _) => {
|
TerminalLiteral::Regex(s) => {
|
||||||
match re::parse_regex(interner.data(s)) {
|
match re::parse_regex(interner.data(s)) {
|
||||||
Ok(regex) => regexs.push(regex),
|
Ok(regex) => regexs.push(regex),
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
let literal_span = literals_map[&literal];
|
let literal_span = spans[&match_entry.match_literal];
|
||||||
// FIXME -- take offset into account for
|
// FIXME -- take offset into account for
|
||||||
// span; this requires knowing how many #
|
// span; this requires knowing how many #
|
||||||
// the user used, which we do not track
|
// the user used, which we do not track
|
||||||
return_err!(
|
return_err!(literal_span, "invalid regular expression: {}", error);
|
||||||
literal_span,
|
|
||||||
"invalid regular expression: {}",
|
|
||||||
error);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -267,31 +321,28 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
|
|||||||
LineBoundary => r#"line boundaries (`^` or `$`)"#,
|
LineBoundary => r#"line boundaries (`^` or `$`)"#,
|
||||||
TextBoundary => r#"text boundaries (`^` or `$`)"#,
|
TextBoundary => r#"text boundaries (`^` or `$`)"#,
|
||||||
};
|
};
|
||||||
let literal = literals[index.index()];
|
let literal = match_entries[index.index()].match_literal;
|
||||||
let span = literals_map[&literal];
|
return_err!(spans[&literal],
|
||||||
return_err!(
|
"{} are not supported in regular expressions",
|
||||||
span,
|
feature)
|
||||||
"{} are not supported in regular expressions",
|
|
||||||
feature)
|
|
||||||
}
|
}
|
||||||
Err(DFAConstructionError::Ambiguity { match0, match1 }) => {
|
Err(DFAConstructionError::Ambiguity { match0, match1 }) => {
|
||||||
let literal0 = literals[match0.index()];
|
let literal0 = match_entries[match0.index()].match_literal;
|
||||||
let literal1 = literals[match1.index()];
|
let literal1 = match_entries[match1.index()].match_literal;
|
||||||
let span0 = literals_map[&literal0];
|
|
||||||
let _span1 = literals_map[&literal1];
|
|
||||||
// FIXME(#88) -- it'd be nice to give an example here
|
// FIXME(#88) -- it'd be nice to give an example here
|
||||||
return_err!(
|
return_err!(spans[&literal0],
|
||||||
span0,
|
"ambiguity detected between the terminal `{}` and the terminal `{}`",
|
||||||
"ambiguity detected between the terminal `{}` and the terminal `{}`",
|
literal0,
|
||||||
literal0, literal1);
|
literal1)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
grammar.items.push(GrammarItem::InternToken(InternToken {
|
grammar
|
||||||
literals: literals,
|
.items
|
||||||
match_to_user_name_map: match_to_user_name_map,
|
.push(GrammarItem::InternToken(InternToken {
|
||||||
dfa: dfa
|
match_entries: match_entries,
|
||||||
}));
|
dfa: dfa,
|
||||||
|
}));
|
||||||
|
|
||||||
// we need to inject a `'input` lifetime and `input: &'input str` parameter as well:
|
// we need to inject a `'input` lifetime and `input: &'input str` parameter as well:
|
||||||
|
|
||||||
@ -299,38 +350,36 @@ pub fn construct(grammar: &mut Grammar, literals_map: Map<TerminalLiteral, Span>
|
|||||||
for parameter in &grammar.type_parameters {
|
for parameter in &grammar.type_parameters {
|
||||||
match *parameter {
|
match *parameter {
|
||||||
TypeParameter::Lifetime(i) if i == input_lifetime => {
|
TypeParameter::Lifetime(i) if i == input_lifetime => {
|
||||||
return_err!(
|
return_err!(grammar.span,
|
||||||
grammar.span,
|
"since there is no external token enum specified, \
|
||||||
"since there is no external token enum specified, \
|
|
||||||
the `'input` lifetime is implicit and cannot be declared");
|
the `'input` lifetime is implicit and cannot be declared");
|
||||||
}
|
}
|
||||||
_ => { }
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let input_parameter = intern(INPUT_PARAMETER);
|
let input_parameter = intern(INPUT_PARAMETER);
|
||||||
for parameter in &grammar.parameters {
|
for parameter in &grammar.parameters {
|
||||||
if parameter.name == input_parameter {
|
if parameter.name == input_parameter {
|
||||||
return_err!(
|
return_err!(grammar.span,
|
||||||
grammar.span,
|
"since there is no external token enum specified, \
|
||||||
"since there is no external token enum specified, \
|
|
||||||
the `input` parameter is implicit and cannot be declared");
|
the `input` parameter is implicit and cannot be declared");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
grammar.type_parameters.insert(0, TypeParameter::Lifetime(input_lifetime));
|
grammar
|
||||||
|
.type_parameters
|
||||||
|
.insert(0, TypeParameter::Lifetime(input_lifetime));
|
||||||
|
|
||||||
let parameter = Parameter {
|
let parameter = Parameter {
|
||||||
name: input_parameter,
|
name: input_parameter,
|
||||||
ty: TypeRef::Ref {
|
ty: TypeRef::Ref {
|
||||||
lifetime: Some(input_lifetime),
|
lifetime: Some(input_lifetime),
|
||||||
mutable: false,
|
mutable: false,
|
||||||
referent: Box::new(TypeRef::Id(intern("str")))
|
referent: Box::new(TypeRef::Id(intern("str"))),
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
grammar.parameters.push(parameter);
|
grammar.parameters.push(parameter);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,19 +24,19 @@ fn check_intern_token(grammar: &str,
|
|||||||
let parsed_grammar = validate_grammar(&grammar).expect("validate");
|
let parsed_grammar = validate_grammar(&grammar).expect("validate");
|
||||||
let intern_token = parsed_grammar.intern_token().expect("intern_token");
|
let intern_token = parsed_grammar.intern_token().expect("intern_token");
|
||||||
println!("intern_token: {:?}", intern_token);
|
println!("intern_token: {:?}", intern_token);
|
||||||
for (input, expected_literal) in expected_tokens {
|
for (input, expected_user_name) in expected_tokens {
|
||||||
let actual_literal =
|
let actual_user_name =
|
||||||
interpret::interpret(&intern_token.dfa, input)
|
interpret::interpret(&intern_token.dfa, input)
|
||||||
.map(|(index, text)| {
|
.map(|(index, text)| {
|
||||||
let literal = intern_token.literals[index.index()];
|
let user_name = intern_token.match_entries[index.index()].user_name;
|
||||||
(literal, text)
|
(user_name, text)
|
||||||
});
|
});
|
||||||
let actual_literal = format!("{:?}", actual_literal);
|
let actual_user_name = format!("{:?}", actual_user_name);
|
||||||
if expected_literal != actual_literal {
|
if expected_user_name != actual_user_name {
|
||||||
panic!("input `{}` matched `{}` but we expected `{}`",
|
panic!("input `{}` matched `{}` but we expected `{}`",
|
||||||
input,
|
input,
|
||||||
actual_literal,
|
actual_user_name,
|
||||||
expected_literal);
|
expected_user_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -85,11 +85,11 @@ fn invalid_regular_expression_unterminated_group() {
|
|||||||
fn quoted_literals() {
|
fn quoted_literals() {
|
||||||
check_intern_token(
|
check_intern_token(
|
||||||
r#"grammar; X = X "+" "-" "foo" "(" ")";"#,
|
r#"grammar; X = X "+" "-" "foo" "(" ")";"#,
|
||||||
vec![("+", r#"Some(("+"+1, "+"))"#),
|
vec![("+", r#"Some(("+", "+"))"#),
|
||||||
("-", r#"Some(("-"+1, "-"))"#),
|
("-", r#"Some(("-", "-"))"#),
|
||||||
("(", r#"Some(("("+1, "("))"#),
|
("(", r#"Some(("(", "("))"#),
|
||||||
(")", r#"Some((")"+1, ")"))"#),
|
(")", r#"Some((")", ")"))"#),
|
||||||
("foo", r#"Some(("foo"+1, "foo"))"#),
|
("foo", r#"Some(("foo", "foo"))"#),
|
||||||
("<", r#"None"#)]);
|
("<", r#"None"#)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,10 +98,10 @@ fn regex_literals() {
|
|||||||
check_intern_token(
|
check_intern_token(
|
||||||
r#"grammar; X = X r"[a-z]+" r"[0-9]+";"#,
|
r#"grammar; X = X r"[a-z]+" r"[0-9]+";"#,
|
||||||
vec![
|
vec![
|
||||||
("a", r##"Some((r#"[a-z]+"#+0, "a"))"##),
|
("a", r##"Some((r#"[a-z]+"#, "a"))"##),
|
||||||
("def", r##"Some((r#"[a-z]+"#+0, "def"))"##),
|
("def", r##"Some((r#"[a-z]+"#, "def"))"##),
|
||||||
("1", r##"Some((r#"[0-9]+"#+0, "1"))"##),
|
("1", r##"Some((r#"[0-9]+"#, "1"))"##),
|
||||||
("9123456", r##"Some((r#"[0-9]+"#+0, "9123456"))"##),
|
("9123456", r##"Some((r#"[0-9]+"#, "9123456"))"##),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,9 +110,9 @@ fn match_mappings() {
|
|||||||
check_intern_token(
|
check_intern_token(
|
||||||
r#"grammar; match { r"(?i)begin" => "BEGIN" } else { "abc" => ALPHA } X = "BEGIN" ALPHA;"#,
|
r#"grammar; match { r"(?i)begin" => "BEGIN" } else { "abc" => ALPHA } X = "BEGIN" ALPHA;"#,
|
||||||
vec![
|
vec![
|
||||||
("BEGIN", r##"Some((r#"(?i)begin"#+4, "BEGIN"))"##),
|
("BEGIN", r##"Some(("BEGIN", "BEGIN"))"##),
|
||||||
("begin", r##"Some((r#"(?i)begin"#+4, "begin"))"##),
|
("begin", r##"Some(("BEGIN", "begin"))"##),
|
||||||
("abc", r#"Some(("abc"+3, "abc"))"#), // ALPHA
|
("abc", r#"Some((ALPHA, "abc"))"#),
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ use grammar::parse_tree::{ActionKind, Alternative,
|
|||||||
Path,
|
Path,
|
||||||
Span,
|
Span,
|
||||||
SymbolKind,
|
SymbolKind,
|
||||||
TerminalString, TypeRef};
|
TypeRef};
|
||||||
use grammar::repr::{NominalTypeRepr, Types, TypeRepr};
|
use grammar::repr::{NominalTypeRepr, Types, TypeRepr};
|
||||||
use intern::intern;
|
use intern::intern;
|
||||||
|
|
||||||
@ -79,12 +79,8 @@ impl<'grammar> TypeInferencer<'grammar> {
|
|||||||
|
|
||||||
let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type);
|
let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type);
|
||||||
|
|
||||||
for &literal in &intern_token.literals {
|
for match_entry in &intern_token.match_entries {
|
||||||
let user_name = intern_token.match_to_user_name_map
|
types.add_term_type(match_entry.user_name, input_str.clone());
|
||||||
.get(&literal)
|
|
||||||
.cloned()
|
|
||||||
.unwrap_or(TerminalString::Literal(literal));
|
|
||||||
types.add_term_type(user_name, input_str.clone());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
types
|
types
|
||||||
|
@ -344,8 +344,8 @@ QuotedTerminal: TerminalString = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
QuotedLiteral: TerminalLiteral = {
|
QuotedLiteral: TerminalLiteral = {
|
||||||
<s:StringLiteral> => TerminalLiteral::Quoted(s, 1),
|
<s:StringLiteral> => TerminalLiteral::Quoted(s),
|
||||||
<s:RegexLiteral> => TerminalLiteral::Regex(s, 0),
|
<s:RegexLiteral> => TerminalLiteral::Regex(s),
|
||||||
};
|
};
|
||||||
|
|
||||||
StringLiteral: InternedString =
|
StringLiteral: InternedString =
|
||||||
|
@ -49,7 +49,7 @@ fn match_complex() {
|
|||||||
let item00 = contents0.items.get(0).unwrap();
|
let item00 = contents0.items.get(0).unwrap();
|
||||||
match *item00 {
|
match *item00 {
|
||||||
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
||||||
assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#+0");
|
assert_eq!(format!("{:?}", sym), "r#\"(?i)begin\"#");
|
||||||
assert_eq!(format!("{}", mapping), "\"BEGIN\"");
|
assert_eq!(format!("{}", mapping), "\"BEGIN\"");
|
||||||
},
|
},
|
||||||
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
|
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
|
||||||
@ -58,7 +58,7 @@ fn match_complex() {
|
|||||||
let item01 = contents0.items.get(1).unwrap();
|
let item01 = contents0.items.get(1).unwrap();
|
||||||
match *item01 {
|
match *item01 {
|
||||||
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
||||||
assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#+0");
|
assert_eq!(format!("{:?}", sym), "r#\"(?i)end\"#");
|
||||||
assert_eq!(format!("{}", mapping), "\"END\"");
|
assert_eq!(format!("{}", mapping), "\"END\"");
|
||||||
},
|
},
|
||||||
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
|
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item00)
|
||||||
@ -69,7 +69,7 @@ fn match_complex() {
|
|||||||
let item10 = contents1.items.get(0).unwrap();
|
let item10 = contents1.items.get(0).unwrap();
|
||||||
match *item10 {
|
match *item10 {
|
||||||
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
MatchItem::Mapped(ref sym, ref mapping, _) => {
|
||||||
assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#+0");
|
assert_eq!(format!("{:?}", sym), "r#\"[a-zA-Z_][a-zA-Z0-9_]*\"#");
|
||||||
assert_eq!(format!("{}", mapping), "IDENTIFIER");
|
assert_eq!(format!("{}", mapping), "IDENTIFIER");
|
||||||
},
|
},
|
||||||
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item10)
|
_ => panic!("expected MatchItem::Mapped, but was: {:?}", item10)
|
||||||
@ -80,7 +80,7 @@ fn match_complex() {
|
|||||||
let item20 = contents2.items.get(0).unwrap();
|
let item20 = contents2.items.get(0).unwrap();
|
||||||
match *item20 {
|
match *item20 {
|
||||||
MatchItem::Unmapped(ref sym, _) => {
|
MatchItem::Unmapped(ref sym, _) => {
|
||||||
assert_eq!(format!("{:?}", sym), "\"other\"+1");
|
assert_eq!(format!("{:?}", sym), "\"other\"");
|
||||||
},
|
},
|
||||||
_ => panic!("expected MatchItem::Unmapped, but was: {:?}", item20)
|
_ => panic!("expected MatchItem::Unmapped, but was: {:?}", item20)
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user