Merge the public parsers in lrgrammar to one

This shrinks the generated code to 1Mb (from 2Mb WITH the cfg approach
in #413) while also removing the cfg'ing itself as that barely reduces the
size. (Thus this supersedes #413)

While this is a bit boilerplatey, I'd say it is worthile. We could
generate this in LALRPOP itself with some work
https://github.com/lalrpop/lalrpop/issues/304#issuecomment-431654594
This commit is contained in:
Markus Westerlind 2018-10-20 14:07:10 +02:00
parent af12168773
commit e5f2897aee
5 changed files with 230 additions and 116 deletions

58
lalrpop/build.rs Normal file
View File

@ -0,0 +1,58 @@
use std::env;
use std::error::Error;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::{exit, Command};
fn main() {
if let Err(err) = main_() {
eprintln!("{}", err);
exit(1);
}
}
fn main_() -> Result<(), Box<Error>> {
let grammar_file = "src/parser/lrgrammar.lalrpop";
println!(r#"cargo:rerun-if-changed={}"#, grammar_file);
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("cargo did not set OUT_DIR"));
fs::create_dir_all(out_dir.join("src/parser"))?;
let target_dir = if Path::new("target").exists() {
Path::new("target")
} else {
Path::new("../target")
};
let lalrpop_path = target_dir
.join("debug/lalrpop")
.with_extension(env::consts::EXE_EXTENSION);
println!(r#"cargo:rerun-if-changed={}"#, lalrpop_path.display());
if lalrpop_path.exists() {
// If compiling lalrpop itself, enable test parsers
if target_dir.exists() {
env::set_var("CARGO_FEATURE_TEST", "1");
println!(r#"cargo:rustc-cfg=feature="test""#);
}
let copied_grammar = out_dir.join("src/parser/lrgrammar.lalrpop");
fs::copy(grammar_file, &copied_grammar)
.map_err(|err| format!("Unable to grammar to OUT_DIR: {}", err))?;
let status = Command::new(lalrpop_path)
.args(&[
"--force",
"--features",
"test",
copied_grammar
.to_str()
.expect("grammar path is not valid UTF-8"),
])
.status()?;
if !status.success() {
return Err("Compiling the .lalrpop file failed".into());
}
}
Ok(())
}

View File

@ -1,57 +1,58 @@
// Need this for rusty_peg
#![recursion_limit = "256"]
// I hate this lint.
#![allow(unused_parens)]
// The builtin tests don't cover the CLI and so forth, and it's just
// too darn annoying to try and make them do so.
#![cfg_attr(test, allow(dead_code))]
extern crate ascii_canvas;
extern crate atty;
extern crate bit_set;
extern crate diff;
extern crate ena;
extern crate itertools;
extern crate lalrpop_util;
extern crate petgraph;
extern crate regex;
extern crate regex_syntax;
extern crate sha2;
extern crate string_cache;
extern crate term;
extern crate unicode_xid;
#[cfg(test)]
extern crate rand;
// hoist the modules that define macros up earlier
#[macro_use]
mod rust;
#[macro_use]
mod log;
mod api;
mod build;
mod collections;
mod file_text;
mod grammar;
mod kernel_set;
mod lexer;
mod lr1;
mod message;
mod normalize;
mod parser;
mod session;
mod tls;
mod tok;
mod util;
#[cfg(test)]
mod generate;
#[cfg(test)]
mod test_util;
pub use api::process_root;
pub use api::process_root_unconditionally;
pub use api::Configuration;
use ascii_canvas::style;
// Need this for rusty_peg
#![recursion_limit = "256"]
// I hate this lint.
#![allow(unused_parens)]
// The builtin tests don't cover the CLI and so forth, and it's just
// too darn annoying to try and make them do so.
#![cfg_attr(test, allow(dead_code))]
extern crate ascii_canvas;
extern crate atty;
extern crate bit_set;
extern crate diff;
extern crate ena;
extern crate itertools;
#[cfg_attr(any(feature = "test", test), macro_use)]
extern crate lalrpop_util;
extern crate petgraph;
extern crate regex;
extern crate regex_syntax;
extern crate sha2;
extern crate string_cache;
extern crate term;
extern crate unicode_xid;
#[cfg(test)]
extern crate rand;
// hoist the modules that define macros up earlier
#[macro_use]
mod rust;
#[macro_use]
mod log;
mod api;
mod build;
mod collections;
mod file_text;
mod grammar;
mod kernel_set;
mod lexer;
mod lr1;
mod message;
mod normalize;
mod parser;
mod session;
mod tls;
mod tok;
mod util;
#[cfg(test)]
mod generate;
#[cfg(test)]
mod test_util;
pub use api::process_root;
pub use api::process_root_unconditionally;
pub use api::Configuration;
use ascii_canvas::style;

View File

@ -6,9 +6,19 @@ use tok::{self, Tok};
use util::strip;
use lalrpop_util::ParseError;
use super::Top;
grammar<'input>(text: &'input str);
pub Grammar: Grammar =
pub Top: Top = {
"StartGrammar" <Grammar> => Top::Grammar(<>),
"StartPattern" <Pattern> => Top::Pattern(<>),
"StartMatchMapping" <MatchMapping> => Top::MatchMapping(<>),
"StartTypeRef" <TypeRef> => Top::TypeRef(<>),
"StartGrammarWhereClauses" <GrammarWhereClauses> => Top::GrammarWhereClauses(<>),
};
Grammar: Grammar =
<module_attributes:ShebangAttribute*>
<uses:Use*>
<annotations:Annotation*>
@ -36,7 +46,7 @@ TypeParameter: TypeParameter = {
<l:Id> => TypeParameter::Id(l)
};
pub GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
"where" <Comma<GrammarWhereClause>>;
GrammarWhereClause: WhereClause<TypeRef> = {
@ -226,7 +236,7 @@ SymbolKind1: SymbolKind = {
SymbolKind::Error,
};
pub TypeRef: TypeRef = {
TypeRef: TypeRef = {
"(" <Comma<TypeRef>> ")" =>
TypeRef::Tuple(<>),
@ -299,7 +309,7 @@ MatchItem: MatchItem = {
MatchSymbol = QuotedLiteral;
pub MatchMapping = Terminal;
MatchMapping = Terminal;
EnumToken: EnumToken =
"enum" <lo:@L> <t:TypeRef> <hi:@R> "{"
@ -327,7 +337,7 @@ Conversion: Conversion =
to: pattern })
};
pub Pattern: Pattern<TypeRef> =
Pattern: Pattern<TypeRef> =
<lo:@L> <k:PatternKind> <hi:@R> => Pattern { span: Span(lo, hi), kind: k };
PatternKind: PatternKind<TypeRef> = {
@ -472,5 +482,11 @@ extern {
"*" => Tok::Star,
"~~" => Tok::TildeTilde,
"_" => Tok::Underscore,
"StartGrammar" => Tok::StartGrammar,
"StartPattern" => Tok::StartPattern,
"StartMatchMapping" => Tok::StartMatchMapping,
"StartTypeRef" => Tok::StartTypeRef,
"StartGrammarWhereClauses" => Tok::StartGrammarWhereClauses,
}
}

View File

@ -1,54 +1,84 @@
use grammar::parse_tree::*;
use grammar::pattern::*;
use lalrpop_util;
use tok;
#[allow(dead_code)]
mod lrgrammar;
#[cfg(test)]
mod test;
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, 0);
let mut grammar = try!(lrgrammar::GrammarParser::new().parse(input, tokenizer));
// find a unique prefix that does not appear anywhere in the input
while input.contains(&grammar.prefix) {
grammar.prefix.push('_');
}
Ok(grammar)
}
fn parse_pattern<'input>(
input: &'input str,
offset: usize,
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, offset);
lrgrammar::PatternParser::new().parse(input, tokenizer)
}
fn parse_match_mapping<'input>(
input: &'input str,
offset: usize,
) -> Result<MatchMapping, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, offset);
lrgrammar::MatchMappingParser::new().parse(input, tokenizer)
}
#[cfg(test)]
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, 0);
lrgrammar::TypeRefParser::new().parse(input, tokenizer)
}
#[cfg(test)]
pub fn parse_where_clauses<'input>(
input: &'input str,
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, 0);
lrgrammar::GrammarWhereClausesParser::new().parse(input, tokenizer)
}
use std::iter;
use grammar::parse_tree::*;
use grammar::pattern::*;
use lalrpop_util;
use tok;
#[cfg(not(any(feature = "test", test)))]
#[allow(dead_code)]
mod lrgrammar;
#[cfg(any(feature = "test", test))]
lalrpop_mod!(
// ---------------------------------------------------------------------------------------
// NOTE: Run `cargo build -p lalrpop` once before running `cargo test` to create this file
// ---------------------------------------------------------------------------------------
#[allow(dead_code)]
lrgrammar,
"/src/parser/lrgrammar.rs"
);
#[cfg(test)]
mod test;
pub enum Top {
Grammar(Grammar),
Pattern(Pattern<TypeRef>),
MatchMapping(TerminalString),
TypeRef(TypeRef),
GrammarWhereClauses(Vec<WhereClause<TypeRef>>),
}
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
macro_rules! parser {
($input: expr, $offset: expr, $pat: ident, $tok: ident) => {{
let input = $input;
let tokenizer =
iter::once(Ok((0, tok::Tok::$tok, 0))).chain(tok::Tokenizer::new(input, $offset));
lrgrammar::TopParser::new()
.parse(input, tokenizer)
.map(|top| match top {
Top::$pat(x) => x,
_ => unreachable!(),
})
}};
}
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
let mut grammar = try!(parser!(input, 0, Grammar, StartGrammar));
// find a unique prefix that does not appear anywhere in the input
while input.contains(&grammar.prefix) {
grammar.prefix.push('_');
}
Ok(grammar)
}
fn parse_pattern<'input>(
input: &'input str,
offset: usize,
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
parser!(input, offset, Pattern, StartPattern)
}
fn parse_match_mapping<'input>(
input: &'input str,
offset: usize,
) -> Result<MatchMapping, ParseError<'input>> {
parser!(input, offset, MatchMapping, StartMatchMapping)
}
#[cfg(test)]
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
parser!(input, 0, TypeRef, StartTypeRef)
}
#[cfg(test)]
pub fn parse_where_clauses<'input>(
input: &'input str,
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
parser!(input, 0, GrammarWhereClauses, StartGrammarWhereClauses)
}

View File

@ -97,6 +97,15 @@ pub enum Tok<'input> {
Underscore,
Bang,
ShebangAttribute(&'input str), // #![...]
// Dummy tokens for parser sharing
StartGrammar,
StartPattern,
StartMatchMapping,
#[allow(dead_code)]
StartGrammarWhereClauses,
#[allow(dead_code)]
StartTypeRef,
}
pub struct Tokenizer<'input> {