mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-16 17:00:53 +00:00
separate out pre/post-validation in order to properly check for
terminals without defined conversions; also, eliminate the default conversion
This commit is contained in:
parent
b872e3d68f
commit
1d4c5d3efc
@ -9,7 +9,6 @@ use intern::{intern, InternedString};
|
||||
use grammar::repr::{NominalTypeRepr, TypeRepr};
|
||||
use grammar::pattern::Pattern;
|
||||
use std::fmt::{Debug, Display, Formatter, Error};
|
||||
use std::iter::once;
|
||||
use util::Sep;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
@ -482,13 +481,6 @@ impl Path {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append(&self, id: InternedString) -> Path {
|
||||
Path {
|
||||
absolute: self.absolute,
|
||||
ids: self.ids.iter().cloned().chain(once(id)).collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ActionKind {
|
||||
|
@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
use intern::{InternedString};
|
||||
use grammar::pattern::{Pattern, PatternKind};
|
||||
use grammar::pattern::{Pattern};
|
||||
use std::fmt::{Debug, Display, Formatter, Error};
|
||||
use util::{map, Map, Sep};
|
||||
|
||||
@ -291,24 +291,8 @@ impl ActionFnDefn {
|
||||
}
|
||||
|
||||
impl Grammar {
|
||||
pub fn default_pattern(&self, id: InternedString) -> Pattern<TypeRepr> {
|
||||
let path = self.types.terminal_enum_type().path.append(id);
|
||||
Pattern {
|
||||
span: self.token_span,
|
||||
kind: PatternKind::Enum(path, vec![
|
||||
Pattern {
|
||||
span: self.token_span,
|
||||
kind: PatternKind::DotDot
|
||||
}
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pattern(&self, t: TerminalString) -> Pattern<TypeRepr> {
|
||||
match self.conversions.get(&t).cloned() {
|
||||
Some(p) => p,
|
||||
None => self.default_pattern(t.0),
|
||||
}
|
||||
pub fn pattern(&self, t: TerminalString) -> &Pattern<TypeRepr> {
|
||||
&self.conversions[&t]
|
||||
}
|
||||
|
||||
pub fn productions_for(&self, nonterminal: NonterminalString) -> &[Production] {
|
||||
|
@ -25,21 +25,27 @@ macro_rules! return_err {
|
||||
}
|
||||
|
||||
pub fn normalize(grammar: pt::Grammar) -> NormResult<r::Grammar> {
|
||||
try!(validate::validate(&grammar));
|
||||
normalize_without_validating(grammar)
|
||||
normalize_helper(grammar, true)
|
||||
}
|
||||
|
||||
/// for unit tests, it is convenient to skip the validation step
|
||||
#[cfg(test)]
|
||||
pub fn normalize_without_validating(grammar: pt::Grammar) -> NormResult<r::Grammar> {
|
||||
normalize_helper(grammar, false)
|
||||
}
|
||||
|
||||
fn normalize_helper(grammar: pt::Grammar, validate: bool) -> NormResult<r::Grammar> {
|
||||
if validate { try!(prevalidate::validate(&grammar)); }
|
||||
let grammar = try!(macro_expand::expand_macros(grammar));
|
||||
if validate { try!(postvalidate::validate(&grammar)); }
|
||||
let types = try!(tyinfer::infer_types(&grammar));
|
||||
lower::lower(grammar, types)
|
||||
}
|
||||
|
||||
// These are executed *IN ORDER*:
|
||||
|
||||
// Check some basic safety conditions.
|
||||
mod validate;
|
||||
// Check most safety conditions.
|
||||
mod prevalidate;
|
||||
|
||||
// Expands macros and expressions
|
||||
//
|
||||
@ -56,6 +62,10 @@ mod validate;
|
||||
// may occur.
|
||||
mod macro_expand;
|
||||
|
||||
// Check some safety conditions that can only be tested
|
||||
// after macro expansion.
|
||||
mod postvalidate;
|
||||
|
||||
// Computes types where the user omitted them (or from macro
|
||||
// byproducts).
|
||||
//
|
||||
|
98
lalrpop/src/normalize/postvalidate/mod.rs
Normal file
98
lalrpop/src/normalize/postvalidate/mod.rs
Normal file
@ -0,0 +1,98 @@
|
||||
//! Validate that terminal IDs have conversions. This can only
|
||||
//! be done after macro expansion because some macro arguments
|
||||
//! never make it into an actual production and are only used
|
||||
//! in `if` conditions; we use string literals for those,
|
||||
//! but they do not have to have a defined conversion.
|
||||
|
||||
use super::{NormResult, NormError};
|
||||
|
||||
use grammar::parse_tree::*;
|
||||
use util::{Set};
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub fn validate(grammar: &Grammar) -> NormResult<()> {
|
||||
let conversions: Set<_> =
|
||||
grammar.items
|
||||
.iter()
|
||||
.filter_map(|item| item.as_extern_token())
|
||||
.flat_map(|tt| tt.enum_token.conversions.iter().map(|conversion| conversion.from))
|
||||
.collect();
|
||||
|
||||
let validator = Validator {
|
||||
grammar: grammar,
|
||||
conversions: conversions,
|
||||
};
|
||||
|
||||
validator.validate()
|
||||
}
|
||||
|
||||
struct Validator<'grammar> {
|
||||
grammar: &'grammar Grammar,
|
||||
conversions: Set<TerminalString>,
|
||||
}
|
||||
|
||||
impl<'grammar> Validator<'grammar> {
|
||||
fn validate(&self) -> NormResult<()> {
|
||||
for item in &self.grammar.items {
|
||||
match *item {
|
||||
GrammarItem::Use(..) => { }
|
||||
GrammarItem::ExternToken(_) => { }
|
||||
GrammarItem::Nonterminal(ref data) => {
|
||||
for alternative in &data.alternatives {
|
||||
try!(self.validate_alternative(alternative));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_alternative(&self, alternative: &Alternative) -> NormResult<()> {
|
||||
assert!(alternative.condition.is_none()); // macro expansion should have removed these
|
||||
try!(self.validate_expr(&alternative.expr));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_expr(&self, expr: &ExprSymbol) -> NormResult<()> {
|
||||
for symbol in &expr.symbols {
|
||||
try!(self.validate_symbol(symbol));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_symbol(&self, symbol: &Symbol) -> NormResult<()> {
|
||||
match symbol.kind {
|
||||
SymbolKind::Expr(ref expr) => {
|
||||
try!(self.validate_expr(expr));
|
||||
}
|
||||
SymbolKind::Terminal(term) => {
|
||||
try!(self.validate_terminal(symbol.span, term));
|
||||
}
|
||||
SymbolKind::Nonterminal(_) => {
|
||||
}
|
||||
SymbolKind::Macro(..) => {
|
||||
panic!("macro not removed: {:?}", symbol);
|
||||
}
|
||||
SymbolKind::Repeat(ref repeat) => {
|
||||
try!(self.validate_symbol(&repeat.symbol));
|
||||
}
|
||||
SymbolKind::Choose(ref sym) | SymbolKind::Name(_, ref sym) => {
|
||||
try!(self.validate_symbol(sym));
|
||||
}
|
||||
SymbolKind::Lookahead | SymbolKind::Lookbehind => { }
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_terminal(&self, span: Span, term: TerminalString) -> NormResult<()> {
|
||||
if !self.conversions.contains(&term) {
|
||||
return_err!(span, "terminal `{}` does not have a pattern defined for it",
|
||||
term);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
36
lalrpop/src/normalize/postvalidate/test.rs
Normal file
36
lalrpop/src/normalize/postvalidate/test.rs
Normal file
@ -0,0 +1,36 @@
|
||||
use parser;
|
||||
use grammar::parse_tree::{Span};
|
||||
use regex::Regex;
|
||||
|
||||
fn check_err(expected_err: &str, grammar: &str) {
|
||||
let expected_err = Regex::new(expected_err).unwrap();
|
||||
|
||||
// the string will have a `>>>` and `<<<` in it, which serve to
|
||||
// indicate the span where an error is expected.
|
||||
let start_index = grammar.find(">>>").unwrap();
|
||||
let grammar = grammar.replace(">>>", ""); // remove the `>>>` marker
|
||||
let end_index = grammar.rfind("<<<").unwrap();
|
||||
let grammar = grammar.replace("<<<", "");
|
||||
|
||||
assert!(start_index <= end_index);
|
||||
|
||||
let parsed_grammar = parser::parse_grammar(&grammar).unwrap();
|
||||
match super::validate(&parsed_grammar) {
|
||||
Ok(()) => {
|
||||
panic!("expected error for grammar");
|
||||
}
|
||||
Err(err) => {
|
||||
assert_eq!(err.span, Span(start_index, end_index));
|
||||
|
||||
assert!(expected_err.is_match(&err.message),
|
||||
"unexpected error text `{}`, did not match `{}`", err.message, expected_err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_terminal() {
|
||||
check_err(
|
||||
r#"terminal `"\+"` does not have a pattern defined for it"#,
|
||||
r#"grammar; X = X >>>"+"<<<;"#);
|
||||
}
|
@ -4,17 +4,12 @@ use super::{NormResult, NormError};
|
||||
use super::norm_util::{self, Symbols};
|
||||
|
||||
use grammar::parse_tree::*;
|
||||
use intern::{intern, read, InternedString};
|
||||
use regex::Regex;
|
||||
use util::{Map, Multimap, Sep, set, Set};
|
||||
use intern::{intern, InternedString};
|
||||
use util::{Map, Multimap, Sep, set};
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
thread_local! {
|
||||
static IDENTIFIER_RE: Regex = Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap()
|
||||
}
|
||||
|
||||
pub fn validate(grammar: &Grammar) -> NormResult<()> {
|
||||
let globals = ScopeChain {
|
||||
previous: None,
|
||||
@ -31,18 +26,10 @@ pub fn validate(grammar: &Grammar) -> NormResult<()> {
|
||||
.filter_map(|item| item.as_extern_token())
|
||||
.next();
|
||||
|
||||
let conversions: Set<_> =
|
||||
grammar.items
|
||||
.iter()
|
||||
.filter_map(|item| item.as_extern_token())
|
||||
.flat_map(|tt| tt.enum_token.conversions.iter().map(|conversion| conversion.from))
|
||||
.collect();
|
||||
|
||||
let validator = Validator {
|
||||
grammar: grammar,
|
||||
globals: globals,
|
||||
extern_token: extern_token,
|
||||
conversions: conversions,
|
||||
};
|
||||
|
||||
validator.validate()
|
||||
@ -52,7 +39,6 @@ struct Validator<'grammar> {
|
||||
grammar: &'grammar Grammar,
|
||||
extern_token: Option<&'grammar ExternToken>,
|
||||
globals: ScopeChain<'grammar>,
|
||||
conversions: Set<TerminalString>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
@ -221,8 +207,8 @@ impl<'grammar> Validator<'grammar> {
|
||||
SymbolKind::Expr(ref expr) => {
|
||||
try!(self.validate_expr(scope, expr));
|
||||
}
|
||||
SymbolKind::Terminal(term) => {
|
||||
try!(self.validate_terminal(symbol.span, term));
|
||||
SymbolKind::Terminal(_) => {
|
||||
/* see postvalidate! */
|
||||
}
|
||||
SymbolKind::Nonterminal(nt) => {
|
||||
try!(self.validate_nt(scope, symbol.span, nt));
|
||||
@ -245,6 +231,10 @@ impl<'grammar> Validator<'grammar> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for arg in &msym.args {
|
||||
try!(self.validate_symbol(scope, arg));
|
||||
}
|
||||
}
|
||||
SymbolKind::Repeat(ref repeat) => {
|
||||
try!(self.validate_symbol(scope, &repeat.symbol));
|
||||
@ -274,31 +264,6 @@ impl<'grammar> Validator<'grammar> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_terminal(&self,
|
||||
span: Span,
|
||||
term: TerminalString)
|
||||
-> NormResult<()> {
|
||||
// if this is a valid Rust identifier, then the
|
||||
// terminal is accepted
|
||||
if self.is_identifier(term.0) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// otherwise, a remapping must have been defined
|
||||
if self.conversions.contains(&term) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
return_err!(span, "terminal `{}` is neither a valid Rust identifier \
|
||||
and nor does it have a defined conversion", term);
|
||||
}
|
||||
|
||||
fn is_identifier(&self, term: InternedString) -> bool {
|
||||
IDENTIFIER_RE.with(|identifier_re|
|
||||
read(|interner|
|
||||
identifier_re.is_match(interner.data(term))))
|
||||
}
|
||||
|
||||
fn validate_nt(&self,
|
||||
scope: &ScopeChain,
|
||||
span: Span,
|
@ -35,6 +35,13 @@ fn unknown_nonterminal() {
|
||||
r#"grammar; X = X >>>Y<<<;"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_nonterminal_in_macro_arg() {
|
||||
check_err(
|
||||
"no definition found for nonterminal `Y`",
|
||||
r#"grammar; X = X Id<>>>Y<<<>; Id<T> = T;"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repeated_macro_arg() {
|
||||
check_err(
|
@ -47,6 +47,7 @@ pub enum Tok<'input> {
|
||||
Equals,
|
||||
EqualsEquals,
|
||||
EqualsGreaterThanCode(&'input str),
|
||||
EqualsGreaterThanQuestionCode(&'input str),
|
||||
EqualsGreaterThanLookahead,
|
||||
EqualsGreaterThanLookbehind,
|
||||
GreaterThan,
|
||||
@ -113,33 +114,47 @@ impl<'input> Tokenizer<'input> {
|
||||
// we've seen =>, now we have to choose between:
|
||||
//
|
||||
// => code
|
||||
// =>? code
|
||||
// =>@L
|
||||
// =>@R
|
||||
|
||||
let idx1 = match self.lookahead {
|
||||
match self.lookahead {
|
||||
Some((_, '@')) => {
|
||||
match self.bump() {
|
||||
Some((idx2, 'L')) => {
|
||||
self.bump();
|
||||
return Ok((idx0, EqualsGreaterThanLookahead, idx2+1));
|
||||
Ok((idx0, EqualsGreaterThanLookahead, idx2+1))
|
||||
}
|
||||
Some((idx2, 'R')) => {
|
||||
self.bump();
|
||||
return Ok((idx0, EqualsGreaterThanLookbehind, idx2+1));
|
||||
Ok((idx0, EqualsGreaterThanLookbehind, idx2+1))
|
||||
}
|
||||
_ => {
|
||||
return Err(UnrecognizedToken(idx0));
|
||||
Err(UnrecognizedToken(idx0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None => {
|
||||
return Err(UnterminatedCode(idx0));
|
||||
Some((idx1, '?')) => {
|
||||
self.bump();
|
||||
let idx2 = try!(self.code(idx0));
|
||||
let code = &self.text[idx1+1..idx2];
|
||||
Ok((idx0, EqualsGreaterThanQuestionCode(code), idx2))
|
||||
}
|
||||
|
||||
Some((idx1, _)) => { idx1 }
|
||||
};
|
||||
Some((idx1, _)) => {
|
||||
let idx2 = try!(self.code(idx0));
|
||||
let code = &self.text[idx1..idx2];
|
||||
Ok((idx0, EqualsGreaterThanCode(code), idx2))
|
||||
}
|
||||
|
||||
None => {
|
||||
Err(UnterminatedCode(idx0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn code(&mut self, idx0: usize) -> Result<usize, Error> {
|
||||
// This is the interesting case. To find the end of the code,
|
||||
// we have to scan ahead, matching (), [], and {}, and looking
|
||||
// for a suitable terminator: `,`, `;`, `]`, `}`, or `)`.
|
||||
@ -163,8 +178,7 @@ impl<'input> Tokenizer<'input> {
|
||||
// terminator. The code is everything *up
|
||||
// to but not including* the terminating
|
||||
// `,`, `;`, etc.
|
||||
let code = &self.text[idx1..];
|
||||
return Ok((idx0, EqualsGreaterThanCode(code), self.text.len()));
|
||||
return Ok(self.text.len());
|
||||
}
|
||||
|
||||
Some((idx2, ';')) |
|
||||
@ -176,8 +190,7 @@ impl<'input> Tokenizer<'input> {
|
||||
// terminator. The code is everything *up
|
||||
// to but not including* the terminating
|
||||
// `,`, `;`, etc.
|
||||
let code = &self.text[idx1..idx2];
|
||||
return Ok((idx0, EqualsGreaterThanCode(code), idx2));
|
||||
return Ok(idx2);
|
||||
}
|
||||
|
||||
None if balance > 0 => {
|
||||
|
@ -55,6 +55,14 @@ fn code1() {
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code2() {
|
||||
test("=>? a(b, c),", vec![
|
||||
("~~~~~~~~~~~ ", EqualsGreaterThanQuestionCode(" a(b, c)")),
|
||||
(" ~", Comma),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn code_forgot_comma() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user