mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-16 17:00:53 +00:00
Support for shebang token
This commit is contained in:
parent
1c586f9b9a
commit
8b0b06965c
@ -233,6 +233,9 @@ fn parse_and_normalize_grammar(session: &Session, file_text: &FileText) -> io::R
|
||||
tok::ErrorCode::UnterminatedCharacterLiteral => {
|
||||
"unterminated character literal; missing `'`?"
|
||||
}
|
||||
tok::ErrorCode::UnterminatedAttribute => {
|
||||
"unterminated #! attribute; missing ]?"
|
||||
}
|
||||
tok::ErrorCode::ExpectedStringLiteral => "expected string literal; missing `\"`?",
|
||||
tok::ErrorCode::UnterminatedCode => {
|
||||
"unterminated code block; perhaps a missing `;`, `)`, `]` or `}`?"
|
||||
|
@ -401,6 +401,7 @@ extern {
|
||||
"=>@R" => Tok::EqualsGreaterThanLookbehind,
|
||||
">" => Tok::GreaterThan,
|
||||
"#" => Tok::Hash,
|
||||
"#![...]" => Tok::ShebangAttribute(<&'input str>),
|
||||
"{" => Tok::LeftBrace,
|
||||
"[" => Tok::LeftBracket,
|
||||
"(" => Tok::LeftParen,
|
||||
|
@ -21,6 +21,7 @@ pub enum ErrorCode {
|
||||
UnterminatedEscape,
|
||||
UnterminatedStringLiteral,
|
||||
UnterminatedCharacterLiteral,
|
||||
UnterminatedAttribute,
|
||||
UnterminatedCode,
|
||||
ExpectedStringLiteral,
|
||||
}
|
||||
@ -87,7 +88,8 @@ pub enum Tok<'input> {
|
||||
Star,
|
||||
TildeTilde,
|
||||
Underscore,
|
||||
Bang
|
||||
Bang,
|
||||
ShebangAttribute(&'input str), // #![...]
|
||||
}
|
||||
|
||||
pub struct Tokenizer<'input> {
|
||||
@ -117,6 +119,40 @@ const KEYWORDS: &'static [(&'static str, Tok<'static>)] = &[
|
||||
("type", Type),
|
||||
];
|
||||
|
||||
/*
|
||||
* Helper for backtracking.
|
||||
*/
|
||||
macro_rules! first {
|
||||
($this:expr, $action:expr, $fallback:expr) => {
|
||||
{
|
||||
let fallback_state = ($this.chars.clone(), $this.lookahead);
|
||||
let result = $action;
|
||||
match result {
|
||||
Ok(_) => {
|
||||
Some(result)
|
||||
}
|
||||
_ => {
|
||||
$this.chars = fallback_state.0;
|
||||
$this.lookahead = fallback_state.1;
|
||||
Some($fallback)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! try_opt {
|
||||
($e:expr, $err:expr) => {
|
||||
{
|
||||
let r = $e;
|
||||
match r {
|
||||
Some(Ok(val)) => val,
|
||||
Some(Err(err)) => return Err(err),
|
||||
None => return $err,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'input> Tokenizer<'input> {
|
||||
pub fn new(text: &'input str, shift: usize) -> Tokenizer<'input> {
|
||||
@ -130,6 +166,47 @@ impl<'input> Tokenizer<'input> {
|
||||
t
|
||||
}
|
||||
|
||||
fn shebang_attribute(&mut self, idx0: usize) -> Result<Spanned<Tok<'input>>, Error> {
|
||||
try_opt!(self.expect_char('!'), error(ErrorCode::UnrecognizedToken, idx0));
|
||||
try_opt!(self.expect_char('['), error(ErrorCode::UnterminatedAttribute, idx0));
|
||||
let mut sq_bracket_counter = 1;
|
||||
while let Some((idx1, c)) = self.lookahead {
|
||||
match c {
|
||||
'[' => {
|
||||
self.bump();
|
||||
sq_bracket_counter += 1
|
||||
}
|
||||
']' => {
|
||||
self.bump();
|
||||
sq_bracket_counter -= 1;
|
||||
match sq_bracket_counter {
|
||||
0 => {
|
||||
let idx2 = idx1 + 1;
|
||||
let data = &self.text[idx0..idx2];
|
||||
self.bump();
|
||||
return Ok((idx0, ShebangAttribute(data), idx2))
|
||||
},
|
||||
n if n < 0 => {
|
||||
return error(UnrecognizedToken, idx0)
|
||||
}
|
||||
_ => ()
|
||||
}
|
||||
}
|
||||
'"' => {
|
||||
self.bump();
|
||||
let _ = try!(self.string_literal(idx1));
|
||||
}
|
||||
'\n' => {
|
||||
return error(UnrecognizedToken, idx0)
|
||||
}
|
||||
_ => {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
}
|
||||
error(UnrecognizedToken, idx0)
|
||||
}
|
||||
|
||||
fn next_unshifted(&mut self) -> Option<Result<Spanned<Tok<'input>>, Error>> {
|
||||
loop {
|
||||
return match self.lookahead {
|
||||
@ -195,7 +272,9 @@ impl<'input> Tokenizer<'input> {
|
||||
}
|
||||
Some((idx0, '#')) => {
|
||||
self.bump();
|
||||
Some(Ok((idx0, Hash, idx0+1)))
|
||||
first!(self,
|
||||
{ self.shebang_attribute(idx0) },
|
||||
{ Ok((idx0, Hash, idx0+1)) })
|
||||
}
|
||||
Some((idx0, '>')) => {
|
||||
self.bump();
|
||||
@ -493,7 +572,6 @@ impl<'input> Tokenizer<'input> {
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
match self.take_until(terminate) {
|
||||
Some(idx1) => {
|
||||
self.bump(); // consume the closing quote
|
||||
@ -686,6 +764,23 @@ impl<'input> Tokenizer<'input> {
|
||||
self.bump().map(|p| {p.0})
|
||||
})
|
||||
}
|
||||
|
||||
fn expect_char(&mut self, c : char) -> Option<Result<usize, Error>> {
|
||||
match self.lookahead {
|
||||
Some((idx0, cc)) if c == cc => {
|
||||
self.bump();
|
||||
Some(Ok((idx0)))
|
||||
}
|
||||
Some((idx0, cc)) => {
|
||||
self.bump();
|
||||
Some(error(UnrecognizedToken, idx0))
|
||||
}
|
||||
None => {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl<'input> Iterator for Tokenizer<'input> {
|
||||
|
@ -385,6 +385,55 @@ fn regex1() {
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hash_token() {
|
||||
test(r#" # "#, vec![
|
||||
(r#" ~ "#, Hash)
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shebang_attribute_normal_text() {
|
||||
test(r#" #![Attribute] "#, vec![
|
||||
(r#" ~~~~~~~~~~~~~ "#, ShebangAttribute("#![Attribute]"))
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shebang_attribute_special_characters_without_quotes() {
|
||||
test(r#" #![set width = 80] "#, vec![
|
||||
(r#" ~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute("#![set width = 80]"))
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shebang_attribute_special_characters_with_quotes() {
|
||||
test(r#" #![set width = "80"] "#, vec![
|
||||
(r#" ~~~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute(r#"#![set width = "80"]"#))
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shebang_attribute_special_characters_closing_sqbracket_in_string_literal() {
|
||||
test(r#" #![set width = "80]"] "#, vec![
|
||||
(r#" ~~~~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute(r#"#![set width = "80]"]"#))
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shebang_attribute_special_characters_opening_sqbracket_in_string_literal() {
|
||||
test(r#" #![set width = "[80"] "#, vec![
|
||||
(r#" ~~~~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute(r#"#![set width = "[80"]"#))
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shebang_attribute_special_characters_nested_sqbrackets() {
|
||||
test(r#" #![set width = [80]] "#, vec![
|
||||
(r#" ~~~~~~~~~~~~~~~~~~~~ "#, ShebangAttribute(r#"#![set width = [80]]"#))
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regex2() {
|
||||
test(r#"r"(123""#, vec![
|
||||
|
Loading…
x
Reference in New Issue
Block a user