Merge the public parsers in lrgrammar to one

This shrinks the generated code to 1Mb (from 2Mb WITH the cfg approach
in #413) while also removing the cfg'ing itself as that barely reduces the
size. (Thus this supersedes #413)

While this is a bit boilerplatey, I'd say it is worthile. We could
generate this in LALRPOP itself with some work
https://github.com/lalrpop/lalrpop/issues/304#issuecomment-431654594
This commit is contained in:
Markus Westerlind 2018-10-20 14:07:10 +02:00
parent af12168773
commit e5f2897aee
5 changed files with 230 additions and 116 deletions

58
lalrpop/build.rs Normal file
View File

@ -0,0 +1,58 @@
use std::env;
use std::error::Error;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::{exit, Command};
fn main() {
if let Err(err) = main_() {
eprintln!("{}", err);
exit(1);
}
}
fn main_() -> Result<(), Box<Error>> {
let grammar_file = "src/parser/lrgrammar.lalrpop";
println!(r#"cargo:rerun-if-changed={}"#, grammar_file);
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("cargo did not set OUT_DIR"));
fs::create_dir_all(out_dir.join("src/parser"))?;
let target_dir = if Path::new("target").exists() {
Path::new("target")
} else {
Path::new("../target")
};
let lalrpop_path = target_dir
.join("debug/lalrpop")
.with_extension(env::consts::EXE_EXTENSION);
println!(r#"cargo:rerun-if-changed={}"#, lalrpop_path.display());
if lalrpop_path.exists() {
// If compiling lalrpop itself, enable test parsers
if target_dir.exists() {
env::set_var("CARGO_FEATURE_TEST", "1");
println!(r#"cargo:rustc-cfg=feature="test""#);
}
let copied_grammar = out_dir.join("src/parser/lrgrammar.lalrpop");
fs::copy(grammar_file, &copied_grammar)
.map_err(|err| format!("Unable to grammar to OUT_DIR: {}", err))?;
let status = Command::new(lalrpop_path)
.args(&[
"--force",
"--features",
"test",
copied_grammar
.to_str()
.expect("grammar path is not valid UTF-8"),
])
.status()?;
if !status.success() {
return Err("Compiling the .lalrpop file failed".into());
}
}
Ok(())
}

View File

@ -1,57 +1,58 @@
// Need this for rusty_peg // Need this for rusty_peg
#![recursion_limit = "256"] #![recursion_limit = "256"]
// I hate this lint. // I hate this lint.
#![allow(unused_parens)] #![allow(unused_parens)]
// The builtin tests don't cover the CLI and so forth, and it's just // The builtin tests don't cover the CLI and so forth, and it's just
// too darn annoying to try and make them do so. // too darn annoying to try and make them do so.
#![cfg_attr(test, allow(dead_code))] #![cfg_attr(test, allow(dead_code))]
extern crate ascii_canvas; extern crate ascii_canvas;
extern crate atty; extern crate atty;
extern crate bit_set; extern crate bit_set;
extern crate diff; extern crate diff;
extern crate ena; extern crate ena;
extern crate itertools; extern crate itertools;
extern crate lalrpop_util; #[cfg_attr(any(feature = "test", test), macro_use)]
extern crate petgraph; extern crate lalrpop_util;
extern crate regex; extern crate petgraph;
extern crate regex_syntax; extern crate regex;
extern crate sha2; extern crate regex_syntax;
extern crate string_cache; extern crate sha2;
extern crate term; extern crate string_cache;
extern crate unicode_xid; extern crate term;
extern crate unicode_xid;
#[cfg(test)]
extern crate rand; #[cfg(test)]
extern crate rand;
// hoist the modules that define macros up earlier
#[macro_use] // hoist the modules that define macros up earlier
mod rust; #[macro_use]
#[macro_use] mod rust;
mod log; #[macro_use]
mod log;
mod api;
mod build; mod api;
mod collections; mod build;
mod file_text; mod collections;
mod grammar; mod file_text;
mod kernel_set; mod grammar;
mod lexer; mod kernel_set;
mod lr1; mod lexer;
mod message; mod lr1;
mod normalize; mod message;
mod parser; mod normalize;
mod session; mod parser;
mod tls; mod session;
mod tok; mod tls;
mod util; mod tok;
mod util;
#[cfg(test)]
mod generate; #[cfg(test)]
#[cfg(test)] mod generate;
mod test_util; #[cfg(test)]
mod test_util;
pub use api::process_root;
pub use api::process_root_unconditionally; pub use api::process_root;
pub use api::Configuration; pub use api::process_root_unconditionally;
use ascii_canvas::style; pub use api::Configuration;
use ascii_canvas::style;

View File

@ -6,9 +6,19 @@ use tok::{self, Tok};
use util::strip; use util::strip;
use lalrpop_util::ParseError; use lalrpop_util::ParseError;
use super::Top;
grammar<'input>(text: &'input str); grammar<'input>(text: &'input str);
pub Grammar: Grammar = pub Top: Top = {
"StartGrammar" <Grammar> => Top::Grammar(<>),
"StartPattern" <Pattern> => Top::Pattern(<>),
"StartMatchMapping" <MatchMapping> => Top::MatchMapping(<>),
"StartTypeRef" <TypeRef> => Top::TypeRef(<>),
"StartGrammarWhereClauses" <GrammarWhereClauses> => Top::GrammarWhereClauses(<>),
};
Grammar: Grammar =
<module_attributes:ShebangAttribute*> <module_attributes:ShebangAttribute*>
<uses:Use*> <uses:Use*>
<annotations:Annotation*> <annotations:Annotation*>
@ -36,7 +46,7 @@ TypeParameter: TypeParameter = {
<l:Id> => TypeParameter::Id(l) <l:Id> => TypeParameter::Id(l)
}; };
pub GrammarWhereClauses: Vec<WhereClause<TypeRef>> = GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
"where" <Comma<GrammarWhereClause>>; "where" <Comma<GrammarWhereClause>>;
GrammarWhereClause: WhereClause<TypeRef> = { GrammarWhereClause: WhereClause<TypeRef> = {
@ -226,7 +236,7 @@ SymbolKind1: SymbolKind = {
SymbolKind::Error, SymbolKind::Error,
}; };
pub TypeRef: TypeRef = { TypeRef: TypeRef = {
"(" <Comma<TypeRef>> ")" => "(" <Comma<TypeRef>> ")" =>
TypeRef::Tuple(<>), TypeRef::Tuple(<>),
@ -299,7 +309,7 @@ MatchItem: MatchItem = {
MatchSymbol = QuotedLiteral; MatchSymbol = QuotedLiteral;
pub MatchMapping = Terminal; MatchMapping = Terminal;
EnumToken: EnumToken = EnumToken: EnumToken =
"enum" <lo:@L> <t:TypeRef> <hi:@R> "{" "enum" <lo:@L> <t:TypeRef> <hi:@R> "{"
@ -327,7 +337,7 @@ Conversion: Conversion =
to: pattern }) to: pattern })
}; };
pub Pattern: Pattern<TypeRef> = Pattern: Pattern<TypeRef> =
<lo:@L> <k:PatternKind> <hi:@R> => Pattern { span: Span(lo, hi), kind: k }; <lo:@L> <k:PatternKind> <hi:@R> => Pattern { span: Span(lo, hi), kind: k };
PatternKind: PatternKind<TypeRef> = { PatternKind: PatternKind<TypeRef> = {
@ -472,5 +482,11 @@ extern {
"*" => Tok::Star, "*" => Tok::Star,
"~~" => Tok::TildeTilde, "~~" => Tok::TildeTilde,
"_" => Tok::Underscore, "_" => Tok::Underscore,
"StartGrammar" => Tok::StartGrammar,
"StartPattern" => Tok::StartPattern,
"StartMatchMapping" => Tok::StartMatchMapping,
"StartTypeRef" => Tok::StartTypeRef,
"StartGrammarWhereClauses" => Tok::StartGrammarWhereClauses,
} }
} }

View File

@ -1,54 +1,84 @@
use grammar::parse_tree::*; use std::iter;
use grammar::pattern::*;
use lalrpop_util; use grammar::parse_tree::*;
use tok; use grammar::pattern::*;
use lalrpop_util;
#[allow(dead_code)] use tok;
mod lrgrammar;
#[cfg(not(any(feature = "test", test)))]
#[cfg(test)] #[allow(dead_code)]
mod test; mod lrgrammar;
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>; #[cfg(any(feature = "test", test))]
lalrpop_mod!(
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> { // ---------------------------------------------------------------------------------------
let tokenizer = tok::Tokenizer::new(input, 0); // NOTE: Run `cargo build -p lalrpop` once before running `cargo test` to create this file
let mut grammar = try!(lrgrammar::GrammarParser::new().parse(input, tokenizer)); // ---------------------------------------------------------------------------------------
#[allow(dead_code)]
// find a unique prefix that does not appear anywhere in the input lrgrammar,
while input.contains(&grammar.prefix) { "/src/parser/lrgrammar.rs"
grammar.prefix.push('_'); );
}
#[cfg(test)]
Ok(grammar) mod test;
}
pub enum Top {
fn parse_pattern<'input>( Grammar(Grammar),
input: &'input str, Pattern(Pattern<TypeRef>),
offset: usize, MatchMapping(TerminalString),
) -> Result<Pattern<TypeRef>, ParseError<'input>> { TypeRef(TypeRef),
let tokenizer = tok::Tokenizer::new(input, offset); GrammarWhereClauses(Vec<WhereClause<TypeRef>>),
lrgrammar::PatternParser::new().parse(input, tokenizer) }
}
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
fn parse_match_mapping<'input>(
input: &'input str, macro_rules! parser {
offset: usize, ($input: expr, $offset: expr, $pat: ident, $tok: ident) => {{
) -> Result<MatchMapping, ParseError<'input>> { let input = $input;
let tokenizer = tok::Tokenizer::new(input, offset); let tokenizer =
lrgrammar::MatchMappingParser::new().parse(input, tokenizer) iter::once(Ok((0, tok::Tok::$tok, 0))).chain(tok::Tokenizer::new(input, $offset));
} lrgrammar::TopParser::new()
.parse(input, tokenizer)
#[cfg(test)] .map(|top| match top {
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> { Top::$pat(x) => x,
let tokenizer = tok::Tokenizer::new(input, 0); _ => unreachable!(),
lrgrammar::TypeRefParser::new().parse(input, tokenizer) })
} }};
}
#[cfg(test)]
pub fn parse_where_clauses<'input>( pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
input: &'input str, let mut grammar = try!(parser!(input, 0, Grammar, StartGrammar));
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, 0); // find a unique prefix that does not appear anywhere in the input
lrgrammar::GrammarWhereClausesParser::new().parse(input, tokenizer) while input.contains(&grammar.prefix) {
} grammar.prefix.push('_');
}
Ok(grammar)
}
fn parse_pattern<'input>(
input: &'input str,
offset: usize,
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
parser!(input, offset, Pattern, StartPattern)
}
fn parse_match_mapping<'input>(
input: &'input str,
offset: usize,
) -> Result<MatchMapping, ParseError<'input>> {
parser!(input, offset, MatchMapping, StartMatchMapping)
}
#[cfg(test)]
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
parser!(input, 0, TypeRef, StartTypeRef)
}
#[cfg(test)]
pub fn parse_where_clauses<'input>(
input: &'input str,
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
parser!(input, 0, GrammarWhereClauses, StartGrammarWhereClauses)
}

View File

@ -97,6 +97,15 @@ pub enum Tok<'input> {
Underscore, Underscore,
Bang, Bang,
ShebangAttribute(&'input str), // #![...] ShebangAttribute(&'input str), // #![...]
// Dummy tokens for parser sharing
StartGrammar,
StartPattern,
StartMatchMapping,
#[allow(dead_code)]
StartGrammarWhereClauses,
#[allow(dead_code)]
StartTypeRef,
} }
pub struct Tokenizer<'input> { pub struct Tokenizer<'input> {