mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-28 06:01:02 +00:00
Support for shebang token
This commit is contained in:
parent
1c586f9b9a
commit
8b0b06965c
@ -233,6 +233,9 @@ fn parse_and_normalize_grammar(session: &Session, file_text: &FileText) -> io::R
|
|||||||
tok::ErrorCode::UnterminatedCharacterLiteral => {
|
tok::ErrorCode::UnterminatedCharacterLiteral => {
|
||||||
"unterminated character literal; missing `'`?"
|
"unterminated character literal; missing `'`?"
|
||||||
}
|
}
|
||||||
|
tok::ErrorCode::UnterminatedAttribute => {
|
||||||
|
"unterminated #! attribute; missing ]?"
|
||||||
|
}
|
||||||
tok::ErrorCode::ExpectedStringLiteral => "expected string literal; missing `\"`?",
|
tok::ErrorCode::ExpectedStringLiteral => "expected string literal; missing `\"`?",
|
||||||
tok::ErrorCode::UnterminatedCode => {
|
tok::ErrorCode::UnterminatedCode => {
|
||||||
"unterminated code block; perhaps a missing `;`, `)`, `]` or `}`?"
|
"unterminated code block; perhaps a missing `;`, `)`, `]` or `}`?"
|
||||||
|
@ -401,6 +401,7 @@ extern {
|
|||||||
"=>@R" => Tok::EqualsGreaterThanLookbehind,
|
"=>@R" => Tok::EqualsGreaterThanLookbehind,
|
||||||
">" => Tok::GreaterThan,
|
">" => Tok::GreaterThan,
|
||||||
"#" => Tok::Hash,
|
"#" => Tok::Hash,
|
||||||
|
"#![...]" => Tok::ShebangAttribute(<&'input str>),
|
||||||
"{" => Tok::LeftBrace,
|
"{" => Tok::LeftBrace,
|
||||||
"[" => Tok::LeftBracket,
|
"[" => Tok::LeftBracket,
|
||||||
"(" => Tok::LeftParen,
|
"(" => Tok::LeftParen,
|
||||||
|
@ -21,6 +21,7 @@ pub enum ErrorCode {
|
|||||||
UnterminatedEscape,
|
UnterminatedEscape,
|
||||||
UnterminatedStringLiteral,
|
UnterminatedStringLiteral,
|
||||||
UnterminatedCharacterLiteral,
|
UnterminatedCharacterLiteral,
|
||||||
|
UnterminatedAttribute,
|
||||||
UnterminatedCode,
|
UnterminatedCode,
|
||||||
ExpectedStringLiteral,
|
ExpectedStringLiteral,
|
||||||
}
|
}
|
||||||
@ -87,7 +88,8 @@ pub enum Tok<'input> {
|
|||||||
Star,
|
Star,
|
||||||
TildeTilde,
|
TildeTilde,
|
||||||
Underscore,
|
Underscore,
|
||||||
Bang
|
Bang,
|
||||||
|
ShebangAttribute(&'input str), // #![...]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Tokenizer<'input> {
|
pub struct Tokenizer<'input> {
|
||||||
@ -117,6 +119,40 @@ const KEYWORDS: &'static [(&'static str, Tok<'static>)] = &[
|
|||||||
("type", Type),
|
("type", Type),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper for backtracking.
|
||||||
|
*/
|
||||||
|
macro_rules! first {
|
||||||
|
($this:expr, $action:expr, $fallback:expr) => {
|
||||||
|
{
|
||||||
|
let fallback_state = ($this.chars.clone(), $this.lookahead);
|
||||||
|
let result = $action;
|
||||||
|
match result {
|
||||||
|
Ok(_) => {
|
||||||
|
Some(result)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
$this.chars = fallback_state.0;
|
||||||
|
$this.lookahead = fallback_state.1;
|
||||||
|
Some($fallback)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! try_opt {
|
||||||
|
($e:expr, $err:expr) => {
|
||||||
|
{
|
||||||
|
let r = $e;
|
||||||
|
match r {
|
||||||
|
Some(Ok(val)) => val,
|
||||||
|
Some(Err(err)) => return Err(err),
|
||||||
|
None => return $err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'input> Tokenizer<'input> {
|
impl<'input> Tokenizer<'input> {
|
||||||
pub fn new(text: &'input str, shift: usize) -> Tokenizer<'input> {
|
pub fn new(text: &'input str, shift: usize) -> Tokenizer<'input> {
|
||||||
@ -130,6 +166,47 @@ impl<'input> Tokenizer<'input> {
|
|||||||
t
|
t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn shebang_attribute(&mut self, idx0: usize) -> Result<Spanned<Tok<'input>>, Error> {
|
||||||
|
try_opt!(self.expect_char('!'), error(ErrorCode::UnrecognizedToken, idx0));
|
||||||
|
try_opt!(self.expect_char('['), error(ErrorCode::UnterminatedAttribute, idx0));
|
||||||
|
let mut sq_bracket_counter = 1;
|
||||||
|
while let Some((idx1, c)) = self.lookahead {
|
||||||
|
match c {
|
||||||
|
'[' => {
|
||||||
|
self.bump();
|
||||||
|
sq_bracket_counter += 1
|
||||||
|
}
|
||||||
|
']' => {
|
||||||
|
self.bump();
|
||||||
|
sq_bracket_counter -= 1;
|
||||||
|
match sq_bracket_counter {
|
||||||
|
0 => {
|
||||||
|
let idx2 = idx1 + 1;
|
||||||
|
let data = &self.text[idx0..idx2];
|
||||||
|
self.bump();
|
||||||
|
return Ok((idx0, ShebangAttribute(data), idx2))
|
||||||
|
},
|
||||||
|
n if n < 0 => {
|
||||||
|
return error(UnrecognizedToken, idx0)
|
||||||
|
}
|
||||||
|
_ => ()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'"' => {
|
||||||
|
self.bump();
|
||||||
|
let _ = try!(self.string_literal(idx1));
|
||||||
|
}
|
||||||
|
'\n' => {
|
||||||
|
return error(UnrecognizedToken, idx0)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
self.bump();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
error(UnrecognizedToken, idx0)
|
||||||
|
}
|
||||||
|
|
||||||
fn next_unshifted(&mut self) -> Option<Result<Spanned<Tok<'input>>, Error>> {
|
fn next_unshifted(&mut self) -> Option<Result<Spanned<Tok<'input>>, Error>> {
|
||||||
loop {
|
loop {
|
||||||
return match self.lookahead {
|
return match self.lookahead {
|
||||||
@ -195,7 +272,9 @@ impl<'input> Tokenizer<'input> {
|
|||||||
}
|
}
|
||||||
Some((idx0, '#')) => {
|
Some((idx0, '#')) => {
|
||||||
self.bump();
|
self.bump();
|
||||||
Some(Ok((idx0, Hash, idx0+1)))
|
first!(self,
|
||||||
|
{ self.shebang_attribute(idx0) },
|
||||||
|
{ Ok((idx0, Hash, idx0+1)) })
|
||||||
}
|
}
|
||||||
Some((idx0, '>')) => {
|
Some((idx0, '>')) => {
|
||||||
self.bump();
|
self.bump();
|
||||||
@ -493,7 +572,6 @@ impl<'input> Tokenizer<'input> {
|
|||||||
false
|
false
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
match self.take_until(terminate) {
|
match self.take_until(terminate) {
|
||||||
Some(idx1) => {
|
Some(idx1) => {
|
||||||
self.bump(); // consume the closing quote
|
self.bump(); // consume the closing quote
|
||||||
@ -686,6 +764,23 @@ impl<'input> Tokenizer<'input> {
|
|||||||
self.bump().map(|p| {p.0})
|
self.bump().map(|p| {p.0})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn expect_char(&mut self, c : char) -> Option<Result<usize, Error>> {
|
||||||
|
match self.lookahead {
|
||||||
|
Some((idx0, cc)) if c == cc => {
|
||||||
|
self.bump();
|
||||||
|
Some(Ok((idx0)))
|
||||||
|
}
|
||||||
|
Some((idx0, cc)) => {
|
||||||
|
self.bump();
|
||||||
|
Some(error(UnrecognizedToken, idx0))
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'input> Iterator for Tokenizer<'input> {
|
impl<'input> Iterator for Tokenizer<'input> {
|
||||||
|
@ -385,6 +385,55 @@ fn regex1() {
|
|||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hash_token() {
|
||||||
|
test(r#" # "#, vec![
|
||||||
|
(r#" ~ "#, Hash)
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn shebang_attribute_normal_text() {
|
||||||
|
test(r#" #![Attribute] "#, vec![
|
||||||
|
(r#" ~~~~~~~~~~~~~ "#, ShebangAttribute("#![Attribute]"))
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn shebang_attribute_special_characters_without_quotes() {
|
||||||
|
test(r#" #![set width = 80] "#, vec![
|
||||||
|
(r#" ~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute("#![set width = 80]"))
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn shebang_attribute_special_characters_with_quotes() {
|
||||||
|
test(r#" #![set width = "80"] "#, vec![
|
||||||
|
(r#" ~~~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute(r#"#![set width = "80"]"#))
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn shebang_attribute_special_characters_closing_sqbracket_in_string_literal() {
|
||||||
|
test(r#" #![set width = "80]"] "#, vec![
|
||||||
|
(r#" ~~~~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute(r#"#![set width = "80]"]"#))
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn shebang_attribute_special_characters_opening_sqbracket_in_string_literal() {
|
||||||
|
test(r#" #![set width = "[80"] "#, vec![
|
||||||
|
(r#" ~~~~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute(r#"#![set width = "[80"]"#))
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn shebang_attribute_special_characters_nested_sqbrackets() {
|
||||||
|
test(r#" #![set width = [80]] "#, vec![
|
||||||
|
(r#" ~~~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute(r#"#![set width = [80]]"#))
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn regex2() {
|
fn regex2() {
|
||||||
test(r#"r"(123""#, vec![
|
test(r#"r"(123""#, vec![
|
||||||
|
Loading…
x
Reference in New Issue
Block a user