Merge pull request #498 from wash2/master

enable byte-based regular expressions #229
This commit is contained in:
Markus Westerlind 2020-01-07 07:37:09 +01:00 committed by GitHub
commit 5eda0670cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 86 additions and 7 deletions

View File

@ -1,6 +1,6 @@
language: rust
rust:
- 1.32.0
- 1.36.0
- beta
- nightly
cache:

View File

@ -113,6 +113,7 @@ lalrpop_mod!(unit);
/// test for match section
lalrpop_mod!(match_section);
lalrpop_mod!(match_section_byte);
lalrpop_mod!(match_alternatives);
/// regression test for issue #253.
@ -879,6 +880,25 @@ fn test_match_section() {
.is_err());
}
#[test]
fn test_match_section_byte() {
assert!(match_section::QueryParser::new()
.parse("SELECT foo")
.is_ok());
assert!(match_section::QueryParser::new()
.parse("select foo")
.is_ok());
assert!(match_section::QueryParser::new()
.parse("INSERT foo")
.is_ok());
assert!(match_section::QueryParser::new()
.parse("UPDATE foo")
.is_ok());
assert!(match_section::QueryParser::new()
.parse("UPDATE update")
.is_err());
}
#[test]
fn test_match_alternatives() {
assert_eq!(

View File

@ -0,0 +1,25 @@
grammar;
// NOTE: This grammar is ambiguous without the match section
match {
r"(?i-u)select",
r"(?i-u)insert" => "INSERT",
r"(?i-u)update" => UPDATE
} else {
_
}
Keyword: String = {
r"(?i-u)select" => String::from("SELECT"),
"INSERT" => String::from("INSERT"),
UPDATE => String::from("UPDATE")
};
Table: String = {
<r"(?i-u)[a-z]+"> => String::from(<>)
};
pub Query: String = {
<Keyword> <Table> => format!("{} {}", <>)
};

View File

@ -4,7 +4,7 @@
use lexer::re::Regex;
use regex_syntax::hir::{
Anchor, Class, ClassUnicodeRange, GroupKind, Hir, HirKind, Literal, RepetitionKind,
Anchor, Class, ClassBytesRange, ClassUnicodeRange, GroupKind, Hir, HirKind, Literal, RepetitionKind,
RepetitionRange,
};
use std::char;
@ -133,7 +133,7 @@ impl NFA {
pub fn is_rejecting_state(&self, from: NFAStateIndex) -> bool {
self.states[from.0].kind == StateKind::Reject
}
///////////////////////////////////////////////////////////////////////////
// Private methods for building an NFA
@ -212,8 +212,13 @@ impl NFA {
self.push_edge(s0, Other, reject);
Ok(s0)
}
// Bytes are not supported
Literal::Byte(_) => Err(NFAConstructionError::ByteRegex),
//// Bytes are not supported
Literal::Byte(b) => {
let s0 = self.new_state(StateKind::Neither);
self.push_edge(s0, Test::byte(b), accept);
self.push_edge(s0, Other, reject);
Ok(s0)
}
}
}
@ -235,8 +240,16 @@ impl NFA {
self.push_edge(s0, Other, reject);
Ok(s0)
}
// Bytes are not supported
Class::Bytes(_) => Err(NFAConstructionError::ByteRegex),
//// Bytes are not supported
Class::Bytes(ref byte) => {
let s0 = self.new_state(StateKind::Neither);
for &range in byte.iter() {
let test: Test = range.into();
self.push_edge(s0, test, accept);
}
self.push_edge(s0, Other, reject);
Ok(s0)
}
}
}
@ -502,6 +515,14 @@ impl Test {
}
}
pub fn byte(b: u8) -> Test {
let b = b as u32;
Test {
start: b,
end: b + 1,
}
}
pub fn inclusive_range(s: char, e: char) -> Test {
Test {
start: s as u32,
@ -509,6 +530,13 @@ impl Test {
}
}
pub fn inclusive_byte_range(s: u8, e: u8) -> Test {
Test {
start: s as u32,
end: e as u32 + 1,
}
}
pub fn exclusive_range(s: char, e: char) -> Test {
Test {
start: s as u32,
@ -553,6 +581,12 @@ impl From<ClassUnicodeRange> for Test {
}
}
impl From<ClassBytesRange> for Test {
fn from(range: ClassBytesRange) -> Test {
Test::inclusive_byte_range(range.start(), range.end())
}
}
impl Debug for Test {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), FmtError> {
match (char::from_u32(self.start), char::from_u32(self.end)) {