separate out pre/post-validation in order to properly check for

terminals without defined conversions; also, eliminate the default
conversion
This commit is contained in:
Niko Matsakis 2015-07-23 08:50:12 -04:00
parent b872e3d68f
commit 1d4c5d3efc
9 changed files with 199 additions and 86 deletions

View File

@ -9,7 +9,6 @@ use intern::{intern, InternedString};
use grammar::repr::{NominalTypeRepr, TypeRepr};
use grammar::pattern::Pattern;
use std::fmt::{Debug, Display, Formatter, Error};
use std::iter::once;
use util::Sep;
#[derive(Clone, Debug, PartialEq, Eq)]
@ -482,13 +481,6 @@ impl Path {
None
}
}
pub fn append(&self, id: InternedString) -> Path {
Path {
absolute: self.absolute,
ids: self.ids.iter().cloned().chain(once(id)).collect()
}
}
}
impl ActionKind {

View File

@ -5,7 +5,7 @@
*/
use intern::{InternedString};
use grammar::pattern::{Pattern, PatternKind};
use grammar::pattern::{Pattern};
use std::fmt::{Debug, Display, Formatter, Error};
use util::{map, Map, Sep};
@ -291,24 +291,8 @@ impl ActionFnDefn {
}
impl Grammar {
pub fn default_pattern(&self, id: InternedString) -> Pattern<TypeRepr> {
let path = self.types.terminal_enum_type().path.append(id);
Pattern {
span: self.token_span,
kind: PatternKind::Enum(path, vec![
Pattern {
span: self.token_span,
kind: PatternKind::DotDot
}
])
}
}
pub fn pattern(&self, t: TerminalString) -> Pattern<TypeRepr> {
match self.conversions.get(&t).cloned() {
Some(p) => p,
None => self.default_pattern(t.0),
}
pub fn pattern(&self, t: TerminalString) -> &Pattern<TypeRepr> {
&self.conversions[&t]
}
pub fn productions_for(&self, nonterminal: NonterminalString) -> &[Production] {

View File

@ -25,21 +25,27 @@ macro_rules! return_err {
}
pub fn normalize(grammar: pt::Grammar) -> NormResult<r::Grammar> {
try!(validate::validate(&grammar));
normalize_without_validating(grammar)
normalize_helper(grammar, true)
}
/// for unit tests, it is convenient to skip the validation step
#[cfg(test)]
pub fn normalize_without_validating(grammar: pt::Grammar) -> NormResult<r::Grammar> {
normalize_helper(grammar, false)
}
fn normalize_helper(grammar: pt::Grammar, validate: bool) -> NormResult<r::Grammar> {
if validate { try!(prevalidate::validate(&grammar)); }
let grammar = try!(macro_expand::expand_macros(grammar));
if validate { try!(postvalidate::validate(&grammar)); }
let types = try!(tyinfer::infer_types(&grammar));
lower::lower(grammar, types)
}
// These are executed *IN ORDER*:
// Check some basic safety conditions.
mod validate;
// Check most safety conditions.
mod prevalidate;
// Expands macros and expressions
//
@ -56,6 +62,10 @@ mod validate;
// may occur.
mod macro_expand;
// Check some safety conditions that can only be tested
// after macro expansion.
mod postvalidate;
// Computes types where the user omitted them (or from macro
// byproducts).
//

View File

@ -0,0 +1,98 @@
//! Validate that terminal IDs have conversions. This can only
//! be done after macro expansion because some macro arguments
//! never make it into an actual production and are only used
//! in `if` conditions; we use string literals for those,
//! but they do not have to have a defined conversion.
use super::{NormResult, NormError};
use grammar::parse_tree::*;
use util::{Set};
#[cfg(test)]
mod test;
pub fn validate(grammar: &Grammar) -> NormResult<()> {
let conversions: Set<_> =
grammar.items
.iter()
.filter_map(|item| item.as_extern_token())
.flat_map(|tt| tt.enum_token.conversions.iter().map(|conversion| conversion.from))
.collect();
let validator = Validator {
grammar: grammar,
conversions: conversions,
};
validator.validate()
}
struct Validator<'grammar> {
grammar: &'grammar Grammar,
conversions: Set<TerminalString>,
}
impl<'grammar> Validator<'grammar> {
fn validate(&self) -> NormResult<()> {
for item in &self.grammar.items {
match *item {
GrammarItem::Use(..) => { }
GrammarItem::ExternToken(_) => { }
GrammarItem::Nonterminal(ref data) => {
for alternative in &data.alternatives {
try!(self.validate_alternative(alternative));
}
}
}
}
Ok(())
}
fn validate_alternative(&self, alternative: &Alternative) -> NormResult<()> {
assert!(alternative.condition.is_none()); // macro expansion should have removed these
try!(self.validate_expr(&alternative.expr));
Ok(())
}
fn validate_expr(&self, expr: &ExprSymbol) -> NormResult<()> {
for symbol in &expr.symbols {
try!(self.validate_symbol(symbol));
}
Ok(())
}
fn validate_symbol(&self, symbol: &Symbol) -> NormResult<()> {
match symbol.kind {
SymbolKind::Expr(ref expr) => {
try!(self.validate_expr(expr));
}
SymbolKind::Terminal(term) => {
try!(self.validate_terminal(symbol.span, term));
}
SymbolKind::Nonterminal(_) => {
}
SymbolKind::Macro(..) => {
panic!("macro not removed: {:?}", symbol);
}
SymbolKind::Repeat(ref repeat) => {
try!(self.validate_symbol(&repeat.symbol));
}
SymbolKind::Choose(ref sym) | SymbolKind::Name(_, ref sym) => {
try!(self.validate_symbol(sym));
}
SymbolKind::Lookahead | SymbolKind::Lookbehind => { }
}
Ok(())
}
fn validate_terminal(&self, span: Span, term: TerminalString) -> NormResult<()> {
if !self.conversions.contains(&term) {
return_err!(span, "terminal `{}` does not have a pattern defined for it",
term);
}
Ok(())
}
}

View File

@ -0,0 +1,36 @@
use parser;
use grammar::parse_tree::{Span};
use regex::Regex;
fn check_err(expected_err: &str, grammar: &str) {
let expected_err = Regex::new(expected_err).unwrap();
// the string will have a `>>>` and `<<<` in it, which serve to
// indicate the span where an error is expected.
let start_index = grammar.find(">>>").unwrap();
let grammar = grammar.replace(">>>", ""); // remove the `>>>` marker
let end_index = grammar.rfind("<<<").unwrap();
let grammar = grammar.replace("<<<", "");
assert!(start_index <= end_index);
let parsed_grammar = parser::parse_grammar(&grammar).unwrap();
match super::validate(&parsed_grammar) {
Ok(()) => {
panic!("expected error for grammar");
}
Err(err) => {
assert_eq!(err.span, Span(start_index, end_index));
assert!(expected_err.is_match(&err.message),
"unexpected error text `{}`, did not match `{}`", err.message, expected_err);
}
}
}
#[test]
fn unknown_terminal() {
check_err(
r#"terminal `"\+"` does not have a pattern defined for it"#,
r#"grammar; X = X >>>"+"<<<;"#);
}

View File

@ -4,17 +4,12 @@ use super::{NormResult, NormError};
use super::norm_util::{self, Symbols};
use grammar::parse_tree::*;
use intern::{intern, read, InternedString};
use regex::Regex;
use util::{Map, Multimap, Sep, set, Set};
use intern::{intern, InternedString};
use util::{Map, Multimap, Sep, set};
#[cfg(test)]
mod test;
thread_local! {
static IDENTIFIER_RE: Regex = Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap()
}
pub fn validate(grammar: &Grammar) -> NormResult<()> {
let globals = ScopeChain {
previous: None,
@ -31,18 +26,10 @@ pub fn validate(grammar: &Grammar) -> NormResult<()> {
.filter_map(|item| item.as_extern_token())
.next();
let conversions: Set<_> =
grammar.items
.iter()
.filter_map(|item| item.as_extern_token())
.flat_map(|tt| tt.enum_token.conversions.iter().map(|conversion| conversion.from))
.collect();
let validator = Validator {
grammar: grammar,
globals: globals,
extern_token: extern_token,
conversions: conversions,
};
validator.validate()
@ -52,7 +39,6 @@ struct Validator<'grammar> {
grammar: &'grammar Grammar,
extern_token: Option<&'grammar ExternToken>,
globals: ScopeChain<'grammar>,
conversions: Set<TerminalString>,
}
#[derive(Copy, Clone, Debug)]
@ -221,8 +207,8 @@ impl<'grammar> Validator<'grammar> {
SymbolKind::Expr(ref expr) => {
try!(self.validate_expr(scope, expr));
}
SymbolKind::Terminal(term) => {
try!(self.validate_terminal(symbol.span, term));
SymbolKind::Terminal(_) => {
/* see postvalidate! */
}
SymbolKind::Nonterminal(nt) => {
try!(self.validate_nt(scope, symbol.span, nt));
@ -245,6 +231,10 @@ impl<'grammar> Validator<'grammar> {
}
}
}
for arg in &msym.args {
try!(self.validate_symbol(scope, arg));
}
}
SymbolKind::Repeat(ref repeat) => {
try!(self.validate_symbol(scope, &repeat.symbol));
@ -274,31 +264,6 @@ impl<'grammar> Validator<'grammar> {
Ok(())
}
fn validate_terminal(&self,
span: Span,
term: TerminalString)
-> NormResult<()> {
// if this is a valid Rust identifier, then the
// terminal is accepted
if self.is_identifier(term.0) {
return Ok(());
}
// otherwise, a remapping must have been defined
if self.conversions.contains(&term) {
return Ok(());
}
return_err!(span, "terminal `{}` is neither a valid Rust identifier \
and nor does it have a defined conversion", term);
}
fn is_identifier(&self, term: InternedString) -> bool {
IDENTIFIER_RE.with(|identifier_re|
read(|interner|
identifier_re.is_match(interner.data(term))))
}
fn validate_nt(&self,
scope: &ScopeChain,
span: Span,

View File

@ -35,6 +35,13 @@ fn unknown_nonterminal() {
r#"grammar; X = X >>>Y<<<;"#);
}
#[test]
fn unknown_nonterminal_in_macro_arg() {
check_err(
"no definition found for nonterminal `Y`",
r#"grammar; X = X Id<>>>Y<<<>; Id<T> = T;"#);
}
#[test]
fn repeated_macro_arg() {
check_err(

View File

@ -47,6 +47,7 @@ pub enum Tok<'input> {
Equals,
EqualsEquals,
EqualsGreaterThanCode(&'input str),
EqualsGreaterThanQuestionCode(&'input str),
EqualsGreaterThanLookahead,
EqualsGreaterThanLookbehind,
GreaterThan,
@ -113,33 +114,47 @@ impl<'input> Tokenizer<'input> {
// we've seen =>, now we have to choose between:
//
// => code
// =>? code
// =>@L
// =>@R
let idx1 = match self.lookahead {
match self.lookahead {
Some((_, '@')) => {
match self.bump() {
Some((idx2, 'L')) => {
self.bump();
return Ok((idx0, EqualsGreaterThanLookahead, idx2+1));
Ok((idx0, EqualsGreaterThanLookahead, idx2+1))
}
Some((idx2, 'R')) => {
self.bump();
return Ok((idx0, EqualsGreaterThanLookbehind, idx2+1));
Ok((idx0, EqualsGreaterThanLookbehind, idx2+1))
}
_ => {
return Err(UnrecognizedToken(idx0));
Err(UnrecognizedToken(idx0))
}
}
}
None => {
return Err(UnterminatedCode(idx0));
Some((idx1, '?')) => {
self.bump();
let idx2 = try!(self.code(idx0));
let code = &self.text[idx1+1..idx2];
Ok((idx0, EqualsGreaterThanQuestionCode(code), idx2))
}
Some((idx1, _)) => { idx1 }
};
Some((idx1, _)) => {
let idx2 = try!(self.code(idx0));
let code = &self.text[idx1..idx2];
Ok((idx0, EqualsGreaterThanCode(code), idx2))
}
None => {
Err(UnterminatedCode(idx0))
}
}
}
fn code(&mut self, idx0: usize) -> Result<usize, Error> {
// This is the interesting case. To find the end of the code,
// we have to scan ahead, matching (), [], and {}, and looking
// for a suitable terminator: `,`, `;`, `]`, `}`, or `)`.
@ -163,8 +178,7 @@ impl<'input> Tokenizer<'input> {
// terminator. The code is everything *up
// to but not including* the terminating
// `,`, `;`, etc.
let code = &self.text[idx1..];
return Ok((idx0, EqualsGreaterThanCode(code), self.text.len()));
return Ok(self.text.len());
}
Some((idx2, ';')) |
@ -176,8 +190,7 @@ impl<'input> Tokenizer<'input> {
// terminator. The code is everything *up
// to but not including* the terminating
// `,`, `;`, etc.
let code = &self.text[idx1..idx2];
return Ok((idx0, EqualsGreaterThanCode(code), idx2));
return Ok(idx2);
}
None if balance > 0 => {

View File

@ -55,6 +55,14 @@ fn code1() {
]);
}
#[test]
fn code2() {
test("=>? a(b, c),", vec![
("~~~~~~~~~~~ ", EqualsGreaterThanQuestionCode(" a(b, c)")),
(" ~", Comma),
]);
}
#[test]
#[should_panic]
fn code_forgot_comma() {