mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-28 06:01:02 +00:00
Merge pull request #509 from Marwes/lexer_comment
feat: Allow the tokenizer to contain custom skip regexes/literals
This commit is contained in:
commit
723678f364
@ -2,6 +2,21 @@ use std::str::FromStr;
|
|||||||
|
|
||||||
grammar;
|
grammar;
|
||||||
|
|
||||||
|
match {
|
||||||
|
"+",
|
||||||
|
"-",
|
||||||
|
"*",
|
||||||
|
"/",
|
||||||
|
"(",
|
||||||
|
")",
|
||||||
|
r"[0-9]+",
|
||||||
|
|
||||||
|
// Skip whitespace and comments
|
||||||
|
r"\s*" => { },
|
||||||
|
r"//[^\n\r]*[\n\r]*" => { }, // `// comment`
|
||||||
|
r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { }, // `/* comment */`
|
||||||
|
}
|
||||||
|
|
||||||
pub Expr: i32 = {
|
pub Expr: i32 = {
|
||||||
<l:Expr> "+" <r:Factor> => l + r,
|
<l:Expr> "+" <r:Factor> => l + r,
|
||||||
<l:Expr> "-" <r:Factor> => l - r,
|
<l:Expr> "-" <r:Factor> => l - r,
|
||||||
|
@ -209,13 +209,13 @@ match {
|
|||||||
} else {
|
} else {
|
||||||
r"\w+",
|
r"\w+",
|
||||||
_
|
_
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Here the match contains two levels; each level can have more than one
|
Here the match contains two levels; each level can have more than one
|
||||||
item in it. The top-level contains only `r"[0-9]+"`, which means that this
|
item in it. The top-level contains only `r"[0-9]+"`, which means that this
|
||||||
regular expression is given highest priority. The next level contains
|
regular expression is given highest priority. The next level contains
|
||||||
`r\w+`, so that will match afterwards.
|
`r\w+`, so that will match afterwards.
|
||||||
|
|
||||||
The final `_` indicates that other string literals and regular
|
The final `_` indicates that other string literals and regular
|
||||||
expressions that appear elsewhere in the grammar (e.g., `"("` or
|
expressions that appear elsewhere in the grammar (e.g., `"("` or
|
||||||
@ -240,7 +240,7 @@ fn calculator2b() {
|
|||||||
|
|
||||||
let result = calculator2b::TermParser::new().parse("(foo33)").unwrap();
|
let result = calculator2b::TermParser::new().parse("(foo33)").unwrap();
|
||||||
assert_eq!(result, "Id(foo33)");
|
assert_eq!(result, "Id(foo33)");
|
||||||
|
|
||||||
// This one will fail:
|
// This one will fail:
|
||||||
|
|
||||||
let result = calculator2b::TermParser::new().parse("(22)").unwrap();
|
let result = calculator2b::TermParser::new().parse("(22)").unwrap();
|
||||||
@ -262,7 +262,7 @@ match {
|
|||||||
} else {
|
} else {
|
||||||
r"\w+",
|
r"\w+",
|
||||||
_
|
_
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
This raises the interesting question of what the precedence is **within**
|
This raises the interesting question of what the precedence is **within**
|
||||||
@ -280,7 +280,7 @@ There is one final twist before we reach the
|
|||||||
can also use `match` declarations to give names to regular
|
can also use `match` declarations to give names to regular
|
||||||
expressions, so that we don't have to type them directly in our
|
expressions, so that we don't have to type them directly in our
|
||||||
grammar. For example, maybe instead of writing `r"\w+"`, we would
|
grammar. For example, maybe instead of writing `r"\w+"`, we would
|
||||||
prefer to write `ID`. We could do that by modifying the match declaration like
|
prefer to write `ID`. We could do that by modifying the match declaration like
|
||||||
so:
|
so:
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -321,6 +321,20 @@ match {
|
|||||||
And now any reference in your grammar to `"BEGIN"` will actually match
|
And now any reference in your grammar to `"BEGIN"` will actually match
|
||||||
any capitalization.
|
any capitalization.
|
||||||
|
|
||||||
|
#### Customizing skipping between tokens
|
||||||
|
|
||||||
|
If we want to support comments we will need to skip more than just whitespace in our lexer.
|
||||||
|
To this end `ignore patterns` can be specified.
|
||||||
|
|
||||||
|
```
|
||||||
|
match {
|
||||||
|
r"\s*" => { }, // The default whitespace skipping is disabled an `ignore pattern` is specified
|
||||||
|
r"//[^\n\r]*[\n\r]*" => { }, // Skip `// comments`
|
||||||
|
r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { }, // Skip `/* comments */`
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
[lexer tutorial]: index.md
|
[lexer tutorial]: index.md
|
||||||
[calculator2b]: ../../calculator/src/calculator2b.lalrpop
|
[calculator2b]: ../../calculator/src/calculator2b.lalrpop
|
||||||
[calculator3]: ../../calculator/src/calculator3.lalrpop
|
[calculator3]: ../../calculator/src/calculator3.lalrpop
|
||||||
|
12
lalrpop-test/src/comments.lalrpop
Normal file
12
lalrpop-test/src/comments.lalrpop
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
grammar;
|
||||||
|
|
||||||
|
match {
|
||||||
|
r"[0-9]+" => NUM,
|
||||||
|
r"\s*" => { },
|
||||||
|
r"//[^\n\r]*[\n\r]*" => { },
|
||||||
|
r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { },
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) Term: Vec<&'input str> = {
|
||||||
|
<NUM*>,
|
||||||
|
};
|
@ -143,6 +143,8 @@ lalrpop_mod!(
|
|||||||
dyn_argument
|
dyn_argument
|
||||||
);
|
);
|
||||||
|
|
||||||
|
lalrpop_mod!(comments);
|
||||||
|
|
||||||
pub fn use_cfg_created_parser() {
|
pub fn use_cfg_created_parser() {
|
||||||
cfg::CreatedParser::new();
|
cfg::CreatedParser::new();
|
||||||
}
|
}
|
||||||
@ -996,3 +998,22 @@ fn verify_lalrpop_generates_itself() {
|
|||||||
Use ./snap.sh to generate a new snapshot of the lrgrammar",
|
Use ./snap.sh to generate a new snapshot of the lrgrammar",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn comments() {
|
||||||
|
assert_eq!(
|
||||||
|
comments::TermParser::new().parse("22 3 5 13").unwrap(),
|
||||||
|
vec!["22", "3", "5", "13"]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
comments::TermParser::new()
|
||||||
|
.parse(
|
||||||
|
"22 /* 123 */ 3 5
|
||||||
|
// abc
|
||||||
|
13 // "
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
vec!["22", "3", "5", "13"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
@ -10,22 +10,29 @@ impl<'a> fmt::Display for Token<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct RegexEntry {
|
||||||
|
regex: regex::Regex,
|
||||||
|
skip: bool,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct MatcherBuilder {
|
pub struct MatcherBuilder {
|
||||||
regex_set: regex::RegexSet,
|
regex_set: regex::RegexSet,
|
||||||
regex_vec: Vec<regex::Regex>,
|
regex_vec: Vec<RegexEntry>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MatcherBuilder {
|
impl MatcherBuilder {
|
||||||
pub fn new<S>(exprs: impl IntoIterator<Item = S>) -> Result<MatcherBuilder, regex::Error>
|
pub fn new<S>(
|
||||||
|
exprs: impl IntoIterator<Item = (S, bool)>,
|
||||||
|
) -> Result<MatcherBuilder, regex::Error>
|
||||||
where
|
where
|
||||||
S: AsRef<str>,
|
S: AsRef<str>,
|
||||||
{
|
{
|
||||||
let exprs = exprs.into_iter();
|
let exprs = exprs.into_iter();
|
||||||
let mut regex_vec = Vec::with_capacity(exprs.size_hint().0);
|
let mut regex_vec = Vec::with_capacity(exprs.size_hint().0);
|
||||||
let mut first_error = None;
|
let mut first_error = None;
|
||||||
let regex_set_result = regex::RegexSet::new(exprs.scan((), |_, s| {
|
let regex_set_result = regex::RegexSet::new(exprs.scan((), |_, (s, skip)| {
|
||||||
regex_vec.push(match regex::Regex::new(s.as_ref()) {
|
regex_vec.push(match regex::Regex::new(s.as_ref()) {
|
||||||
Ok(regex) => regex,
|
Ok(regex) => RegexEntry { regex, skip },
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
first_error = Some(err);
|
first_error = Some(err);
|
||||||
return None;
|
return None;
|
||||||
@ -62,7 +69,7 @@ pub struct Matcher<'input, 'builder, E> {
|
|||||||
text: &'input str,
|
text: &'input str,
|
||||||
consumed: usize,
|
consumed: usize,
|
||||||
regex_set: &'builder regex::RegexSet,
|
regex_set: &'builder regex::RegexSet,
|
||||||
regex_vec: &'builder Vec<regex::Regex>,
|
regex_vec: &'builder Vec<RegexEntry>,
|
||||||
_marker: PhantomData<fn() -> E>,
|
_marker: PhantomData<fn() -> E>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,36 +77,52 @@ impl<'input, 'builder, E> Iterator for Matcher<'input, 'builder, E> {
|
|||||||
type Item = Result<(usize, Token<'input>, usize), ParseError<usize, Token<'input>, E>>;
|
type Item = Result<(usize, Token<'input>, usize), ParseError<usize, Token<'input>, E>>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
let text = self.text.trim_start();
|
loop {
|
||||||
let whitespace = self.text.len() - text.len();
|
let text = self.text;
|
||||||
let start_offset = self.consumed + whitespace;
|
let start_offset = self.consumed;
|
||||||
if text.is_empty() {
|
eprintln!("{:?}", text);
|
||||||
self.text = text;
|
if text.is_empty() {
|
||||||
self.consumed = start_offset;
|
self.consumed = start_offset;
|
||||||
None
|
return None;
|
||||||
} else {
|
|
||||||
let matches = self.regex_set.matches(text);
|
|
||||||
if !matches.matched_any() {
|
|
||||||
Some(Err(ParseError::InvalidToken {
|
|
||||||
location: start_offset,
|
|
||||||
}))
|
|
||||||
} else {
|
} else {
|
||||||
let mut longest_match = 0;
|
let matches = self.regex_set.matches(text);
|
||||||
let mut index = 0;
|
if !matches.matched_any() {
|
||||||
for i in matches.iter() {
|
return Some(Err(ParseError::InvalidToken {
|
||||||
let match_ = self.regex_vec[i].find(text).unwrap();
|
location: start_offset,
|
||||||
let len = match_.end();
|
}));
|
||||||
if len >= longest_match {
|
} else {
|
||||||
longest_match = len;
|
let mut longest_match = 0;
|
||||||
index = i;
|
let mut index = 0;
|
||||||
|
let mut skip = false;
|
||||||
|
for i in matches.iter() {
|
||||||
|
let entry = &self.regex_vec[i];
|
||||||
|
let match_ = entry.regex.find(text).unwrap();
|
||||||
|
let len = match_.end();
|
||||||
|
if len >= longest_match {
|
||||||
|
longest_match = len;
|
||||||
|
index = i;
|
||||||
|
skip = entry.skip;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let result = &text[..longest_match];
|
||||||
|
let remaining = &text[longest_match..];
|
||||||
|
let end_offset = start_offset + longest_match;
|
||||||
|
self.text = remaining;
|
||||||
|
self.consumed = end_offset;
|
||||||
|
|
||||||
|
// Skip any whitespace matches
|
||||||
|
if skip {
|
||||||
|
if longest_match == 0 {
|
||||||
|
return Some(Err(ParseError::InvalidToken {
|
||||||
|
location: start_offset,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Some(Ok((start_offset, Token(index, result), end_offset)));
|
||||||
}
|
}
|
||||||
let result = &text[..longest_match];
|
|
||||||
let remaining = &text[longest_match..];
|
|
||||||
let end_offset = start_offset + longest_match;
|
|
||||||
self.text = remaining;
|
|
||||||
self.consumed = end_offset;
|
|
||||||
Some(Ok((start_offset, Token(index, result), end_offset)))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -109,7 +109,29 @@ impl MatchItem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub type MatchSymbol = TerminalLiteral;
|
pub type MatchSymbol = TerminalLiteral;
|
||||||
pub type MatchMapping = TerminalString;
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, Ord, PartialOrd)]
|
||||||
|
pub enum MatchMapping {
|
||||||
|
Terminal(TerminalString),
|
||||||
|
Skip,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for MatchMapping {
|
||||||
|
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||||
|
match self {
|
||||||
|
MatchMapping::Terminal(term) => write!(fmt, "{:?}", term),
|
||||||
|
MatchMapping::Skip => write!(fmt, "{{ }}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Display for MatchMapping {
|
||||||
|
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
|
||||||
|
match self {
|
||||||
|
MatchMapping::Terminal(term) => write!(fmt, "{}", term),
|
||||||
|
MatchMapping::Skip => write!(fmt, "{{ }}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Intern tokens are not typed by the user: they are synthesized in
|
/// Intern tokens are not typed by the user: they are synthesized in
|
||||||
/// the absence of an "extern" declaration with information about the
|
/// the absence of an "extern" declaration with information about the
|
||||||
@ -158,7 +180,7 @@ pub struct MatchEntry {
|
|||||||
/// NB: This field must go first, so that `PartialOrd` sorts by precedence first!
|
/// NB: This field must go first, so that `PartialOrd` sorts by precedence first!
|
||||||
pub precedence: usize,
|
pub precedence: usize,
|
||||||
pub match_literal: TerminalLiteral,
|
pub match_literal: TerminalLiteral,
|
||||||
pub user_name: TerminalString,
|
pub user_name: MatchMapping,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
//! Generates an iterator type `Matcher` that looks roughly like
|
//! Generates an iterator type `Matcher` that looks roughly like
|
||||||
|
|
||||||
use grammar::parse_tree::InternToken;
|
use grammar::parse_tree::{InternToken, MatchMapping};
|
||||||
use grammar::repr::{Grammar, TerminalLiteral};
|
use grammar::repr::{Grammar, TerminalLiteral};
|
||||||
use lexer::re;
|
use lexer::re;
|
||||||
use rust::RustWrite;
|
use rust::RustWrite;
|
||||||
@ -25,35 +25,48 @@ pub fn compile<W: Write>(
|
|||||||
|
|
||||||
// create a vector of rust string literals with the text of each
|
// create a vector of rust string literals with the text of each
|
||||||
// regular expression
|
// regular expression
|
||||||
let regex_strings: Vec<String> = {
|
let regex_strings = intern_token
|
||||||
intern_token
|
.match_entries
|
||||||
.match_entries
|
.iter()
|
||||||
.iter()
|
.map(|match_entry| {
|
||||||
.map(|match_entry| match match_entry.match_literal {
|
(
|
||||||
TerminalLiteral::Quoted(ref s) => re::parse_literal(&s),
|
match match_entry.match_literal {
|
||||||
TerminalLiteral::Regex(ref s) => re::parse_regex(&s).unwrap(),
|
TerminalLiteral::Quoted(ref s) => re::parse_literal(&s),
|
||||||
})
|
TerminalLiteral::Regex(ref s) => re::parse_regex(&s).unwrap(),
|
||||||
.map(|regex| {
|
},
|
||||||
// make sure all regex are anchored at the beginning of the input
|
match match_entry.user_name {
|
||||||
format!("^({})", regex)
|
MatchMapping::Terminal(_) => false,
|
||||||
})
|
MatchMapping::Skip => true,
|
||||||
.map(|regex_str| {
|
},
|
||||||
// create a rust string with text of the regex; the Debug impl
|
)
|
||||||
// will add quotes and escape
|
})
|
||||||
format!("{:?}", regex_str)
|
.map(|(regex, skip)| {
|
||||||
})
|
// make sure all regex are anchored at the beginning of the input
|
||||||
.collect()
|
(format!("^({})", regex), skip)
|
||||||
};
|
})
|
||||||
|
.map(|(regex_str, skip)| {
|
||||||
|
// create a rust string with text of the regex; the Debug impl
|
||||||
|
// will add quotes and escape
|
||||||
|
(format!("{:?}", regex_str), skip)
|
||||||
|
});
|
||||||
|
|
||||||
rust!(out, "let {}strs: &[&str] = &[", prefix);
|
let mut contains_skip = false;
|
||||||
for literal in ®ex_strings {
|
|
||||||
rust!(out, "{},", literal);
|
rust!(out, "let {}strs: &[(&str, bool)] = &[", prefix);
|
||||||
|
for (literal, skip) in regex_strings {
|
||||||
|
rust!(out, "({}, {}),", literal, skip);
|
||||||
|
contains_skip |= skip;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !contains_skip {
|
||||||
|
rust!(out, r#"(r"^(\s*)", true),"#);
|
||||||
|
}
|
||||||
|
|
||||||
rust!(out, "];");
|
rust!(out, "];");
|
||||||
|
|
||||||
rust!(
|
rust!(
|
||||||
out,
|
out,
|
||||||
"{p}lalrpop_util::lexer::MatcherBuilder::new({p}strs).unwrap()",
|
"{p}lalrpop_util::lexer::MatcherBuilder::new({p}strs.iter().copied()).unwrap()",
|
||||||
p = prefix
|
p = prefix
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@ use collections::{map, Map};
|
|||||||
use grammar::consts::CFG;
|
use grammar::consts::CFG;
|
||||||
use grammar::parse_tree as pt;
|
use grammar::parse_tree as pt;
|
||||||
use grammar::parse_tree::{
|
use grammar::parse_tree::{
|
||||||
read_algorithm, GrammarItem, InternToken, Lifetime, Name, NonterminalString, Path,
|
read_algorithm, GrammarItem, InternToken, Lifetime, MatchMapping, Name, NonterminalString,
|
||||||
TerminalString,
|
Path, TerminalString,
|
||||||
};
|
};
|
||||||
use grammar::pattern::{Pattern, PatternKind};
|
use grammar::pattern::{Pattern, PatternKind};
|
||||||
use grammar::repr as r;
|
use grammar::repr as r;
|
||||||
@ -79,26 +79,29 @@ impl<'s> LowerState<'s> {
|
|||||||
})),
|
})),
|
||||||
};
|
};
|
||||||
self.conversions
|
self.conversions
|
||||||
.extend(data.match_entries.iter().enumerate().map(
|
.extend(data.match_entries.iter().enumerate().filter_map(
|
||||||
|(index, match_entry)| {
|
|(index, match_entry)| match &match_entry.user_name {
|
||||||
let pattern = Pattern {
|
MatchMapping::Terminal(user_name) => {
|
||||||
span,
|
let pattern = Pattern {
|
||||||
kind: PatternKind::TupleStruct(
|
span,
|
||||||
internal_token_path.clone(),
|
kind: PatternKind::TupleStruct(
|
||||||
vec![
|
internal_token_path.clone(),
|
||||||
Pattern {
|
vec![
|
||||||
span,
|
Pattern {
|
||||||
kind: PatternKind::Usize(index),
|
span,
|
||||||
},
|
kind: PatternKind::Usize(index),
|
||||||
Pattern {
|
},
|
||||||
span,
|
Pattern {
|
||||||
kind: PatternKind::Choose(input_str.clone()),
|
span,
|
||||||
},
|
kind: PatternKind::Choose(input_str.clone()),
|
||||||
],
|
},
|
||||||
),
|
],
|
||||||
};
|
),
|
||||||
|
};
|
||||||
|
|
||||||
(match_entry.user_name.clone(), pattern)
|
Some((user_name.clone(), pattern))
|
||||||
|
}
|
||||||
|
MatchMapping::Skip => None,
|
||||||
},
|
},
|
||||||
));
|
));
|
||||||
self.intern_token = Some(data);
|
self.intern_token = Some(data);
|
||||||
|
@ -49,7 +49,7 @@ fn resolve_in_place(grammar: &mut Grammar) -> NormResult<()> {
|
|||||||
.flat_map(|match_token| &match_token.contents)
|
.flat_map(|match_token| &match_token.contents)
|
||||||
.flat_map(|match_contents| &match_contents.items)
|
.flat_map(|match_contents| &match_contents.items)
|
||||||
.filter_map(|item| match *item {
|
.filter_map(|item| match *item {
|
||||||
MatchItem::Mapped(_, TerminalString::Bare(ref id), _) => {
|
MatchItem::Mapped(_, MatchMapping::Terminal(TerminalString::Bare(ref id)), _) => {
|
||||||
Some((item.span(), id.clone(), Def::Terminal))
|
Some((item.span(), id.clone(), Def::Terminal))
|
||||||
}
|
}
|
||||||
_ => None,
|
_ => None,
|
||||||
|
@ -133,7 +133,7 @@ impl MatchBlock {
|
|||||||
match_block.add_match_entry(
|
match_block.add_match_entry(
|
||||||
precedence,
|
precedence,
|
||||||
sym.clone(),
|
sym.clone(),
|
||||||
TerminalString::Literal(sym.clone()),
|
MatchMapping::Terminal(TerminalString::Literal(sym.clone())),
|
||||||
span,
|
span,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -162,7 +162,7 @@ impl MatchBlock {
|
|||||||
&mut self,
|
&mut self,
|
||||||
match_group_precedence: usize,
|
match_group_precedence: usize,
|
||||||
sym: TerminalLiteral,
|
sym: TerminalLiteral,
|
||||||
user_name: TerminalString,
|
user_name: MatchMapping,
|
||||||
span: Span,
|
span: Span,
|
||||||
) -> NormResult<()> {
|
) -> NormResult<()> {
|
||||||
if let Some(_old_span) = self.spans.insert(sym.clone(), span) {
|
if let Some(_old_span) = self.spans.insert(sym.clone(), span) {
|
||||||
@ -170,7 +170,9 @@ impl MatchBlock {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NB: It's legal for multiple regex to produce same terminal.
|
// NB: It's legal for multiple regex to produce same terminal.
|
||||||
self.match_user_names.insert(user_name.clone());
|
if let MatchMapping::Terminal(user_name) = &user_name {
|
||||||
|
self.match_user_names.insert(user_name.clone());
|
||||||
|
}
|
||||||
|
|
||||||
self.match_entries.push(MatchEntry {
|
self.match_entries.push(MatchEntry {
|
||||||
precedence: match_group_precedence * 2 + sym.base_precedence(),
|
precedence: match_group_precedence * 2 + sym.base_precedence(),
|
||||||
@ -203,7 +205,7 @@ impl MatchBlock {
|
|||||||
self.match_entries.push(MatchEntry {
|
self.match_entries.push(MatchEntry {
|
||||||
precedence: sym.base_precedence(),
|
precedence: sym.base_precedence(),
|
||||||
match_literal: sym.clone(),
|
match_literal: sym.clone(),
|
||||||
user_name: TerminalString::Literal(sym.clone()),
|
user_name: MatchMapping::Terminal(TerminalString::Literal(sym.clone())),
|
||||||
});
|
});
|
||||||
|
|
||||||
self.spans.insert(sym, span);
|
self.spans.insert(sym, span);
|
||||||
@ -328,29 +330,26 @@ fn construct(grammar: &mut Grammar, match_block: MatchBlock) -> NormResult<()> {
|
|||||||
// one of precedences, that are parallel with `literals`.
|
// one of precedences, that are parallel with `literals`.
|
||||||
let mut regexs = Vec::with_capacity(match_entries.len());
|
let mut regexs = Vec::with_capacity(match_entries.len());
|
||||||
let mut precedences = Vec::with_capacity(match_entries.len());
|
let mut precedences = Vec::with_capacity(match_entries.len());
|
||||||
{
|
for match_entry in &match_entries {
|
||||||
for match_entry in &match_entries {
|
precedences.push(Precedence(match_entry.precedence));
|
||||||
precedences.push(Precedence(match_entry.precedence));
|
match match_entry.match_literal {
|
||||||
match match_entry.match_literal {
|
TerminalLiteral::Quoted(ref s) => {
|
||||||
TerminalLiteral::Quoted(ref s) => {
|
regexs.push(re::parse_literal(&s));
|
||||||
regexs.push(re::parse_literal(&s));
|
}
|
||||||
}
|
TerminalLiteral::Regex(ref s) => {
|
||||||
TerminalLiteral::Regex(ref s) => {
|
match re::parse_regex(&s) {
|
||||||
match re::parse_regex(&s) {
|
Ok(regex) => regexs.push(regex),
|
||||||
Ok(regex) => regexs.push(regex),
|
Err(error) => {
|
||||||
Err(error) => {
|
let literal_span = spans[&match_entry.match_literal];
|
||||||
let literal_span = spans[&match_entry.match_literal];
|
// FIXME -- take offset into account for
|
||||||
// FIXME -- take offset into account for
|
// span; this requires knowing how many #
|
||||||
// span; this requires knowing how many #
|
// the user used, which we do not track
|
||||||
// the user used, which we do not track
|
return_err!(literal_span, "invalid regular expression: {}", error);
|
||||||
return_err!(literal_span, "invalid regular expression: {}", error);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
}
|
||||||
}?;
|
|
||||||
|
|
||||||
let dfa = match dfa::build_dfa(®exs, &precedences) {
|
let dfa = match dfa::build_dfa(®exs, &precedences) {
|
||||||
Ok(dfa) => dfa,
|
Ok(dfa) => dfa,
|
||||||
|
@ -3,8 +3,8 @@ use super::{NormError, NormResult};
|
|||||||
|
|
||||||
use grammar::consts::{ERROR, LOCATION};
|
use grammar::consts::{ERROR, LOCATION};
|
||||||
use grammar::parse_tree::{
|
use grammar::parse_tree::{
|
||||||
ActionKind, Alternative, Grammar, GrammarItem, Lifetime, NonterminalData, NonterminalString,
|
ActionKind, Alternative, Grammar, GrammarItem, Lifetime, MatchMapping, NonterminalData,
|
||||||
Path, Span, SymbolKind, TypeParameter, TypeRef,
|
NonterminalString, Path, Span, SymbolKind, TypeParameter, TypeRef,
|
||||||
};
|
};
|
||||||
use grammar::repr::{NominalTypeRepr, TypeRepr, Types};
|
use grammar::repr::{NominalTypeRepr, TypeRepr, Types};
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
@ -96,7 +96,9 @@ impl<'grammar> TypeInferencer<'grammar> {
|
|||||||
let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type);
|
let mut types = Types::new(&grammar.prefix, Some(loc_type), error_type, enum_type);
|
||||||
|
|
||||||
for match_entry in &intern_token.match_entries {
|
for match_entry in &intern_token.match_entries {
|
||||||
types.add_term_type(match_entry.user_name.clone(), input_str.clone());
|
if let MatchMapping::Terminal(user_name) = &match_entry.user_name {
|
||||||
|
types.add_term_type(user_name.clone(), input_str.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
types
|
types
|
||||||
|
@ -319,7 +319,10 @@ MatchItem: MatchItem = {
|
|||||||
|
|
||||||
MatchSymbol = QuotedLiteral;
|
MatchSymbol = QuotedLiteral;
|
||||||
|
|
||||||
MatchMapping = Terminal;
|
MatchMapping: MatchMapping = {
|
||||||
|
Terminal => MatchMapping::Terminal(<>),
|
||||||
|
"{" "}" => MatchMapping::Skip,
|
||||||
|
};
|
||||||
|
|
||||||
EnumToken: EnumToken =
|
EnumToken: EnumToken =
|
||||||
"enum" <lo:@L> <t:TypeRef> <hi:@R> "{"
|
"enum" <lo:@L> <t:TypeRef> <hi:@R> "{"
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -22,7 +22,7 @@ mod test;
|
|||||||
pub enum Top {
|
pub enum Top {
|
||||||
Grammar(Grammar),
|
Grammar(Grammar),
|
||||||
Pattern(Pattern<TypeRef>),
|
Pattern(Pattern<TypeRef>),
|
||||||
MatchMapping(TerminalString),
|
MatchMapping(MatchMapping),
|
||||||
TypeRef(TypeRef),
|
TypeRef(TypeRef),
|
||||||
GrammarWhereClauses(Vec<WhereClause<TypeRef>>),
|
GrammarWhereClauses(Vec<WhereClause<TypeRef>>),
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user