Merge pull request #414 from Marwes/shrink_lrgrammar2

feat: Shrink LALRPOPs own parser to 1Mb
This commit is contained in:
Markus Westerlind 2018-10-27 13:24:36 +02:00 committed by GitHub
commit eba453d876
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 29003 additions and 84447 deletions

2
.gitattributes vendored
View File

@ -1 +1,3 @@
* text=auto
lalrpop/src/parser/lrgrammar.lalrpop text eol=lf

View File

@ -927,6 +927,7 @@ fn verify_lalrpop_generates_itself() {
Command::new("../target/debug/lalrpop")
.args(&[
"--force",
"--no-whitespace",
"--out-dir",
out_dir,
copied_grammar_file

58
lalrpop/build.rs Normal file
View File

@ -0,0 +1,58 @@
use std::env;
use std::error::Error;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::{exit, Command};
fn main() {
if let Err(err) = main_() {
eprintln!("{}", err);
exit(1);
}
}
fn main_() -> Result<(), Box<Error>> {
let grammar_file = "src/parser/lrgrammar.lalrpop";
println!(r#"cargo:rerun-if-changed={}"#, grammar_file);
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("cargo did not set OUT_DIR"));
fs::create_dir_all(out_dir.join("src/parser"))?;
let target_dir = if Path::new("target").exists() {
Path::new("target")
} else {
Path::new("../target")
};
let lalrpop_path = target_dir
.join("debug/lalrpop")
.with_extension(env::consts::EXE_EXTENSION);
println!(r#"cargo:rerun-if-changed={}"#, lalrpop_path.display());
if lalrpop_path.exists() {
// If compiling lalrpop itself, enable test parsers
if target_dir.exists() {
env::set_var("CARGO_FEATURE_TEST", "1");
println!(r#"cargo:rustc-cfg=feature="test""#);
}
let copied_grammar = out_dir.join("src/parser/lrgrammar.lalrpop");
fs::copy(grammar_file, &copied_grammar)
.map_err(|err| format!("Unable to grammar to OUT_DIR: {}", err))?;
let status = Command::new(lalrpop_path)
.args(&[
"--force",
"--features",
"test",
copied_grammar
.to_str()
.expect("grammar path is not valid UTF-8"),
])
.status()?;
if !status.success() {
return Err("Compiling the .lalrpop file failed".into());
}
}
Ok(())
}

View File

@ -103,6 +103,13 @@ impl Configuration {
self
}
/// If false, shrinks the generated code by removing redundant white space.
/// Default is true.
pub fn emit_whitespace(&mut self, val: bool) -> &mut Configuration {
self.session.emit_whitespace = val;
self
}
/// If true, emit report file about generated code.
pub fn emit_report(&mut self, val: bool) -> &mut Configuration {
self.session.emit_report = val;

View File

@ -1,57 +1,58 @@
// Need this for rusty_peg
#![recursion_limit = "256"]
// I hate this lint.
#![allow(unused_parens)]
// The builtin tests don't cover the CLI and so forth, and it's just
// too darn annoying to try and make them do so.
#![cfg_attr(test, allow(dead_code))]
extern crate ascii_canvas;
extern crate atty;
extern crate bit_set;
extern crate diff;
extern crate ena;
extern crate itertools;
extern crate lalrpop_util;
extern crate petgraph;
extern crate regex;
extern crate regex_syntax;
extern crate sha2;
extern crate string_cache;
extern crate term;
extern crate unicode_xid;
#[cfg(test)]
extern crate rand;
// hoist the modules that define macros up earlier
#[macro_use]
mod rust;
#[macro_use]
mod log;
mod api;
mod build;
mod collections;
mod file_text;
mod grammar;
mod kernel_set;
mod lexer;
mod lr1;
mod message;
mod normalize;
mod parser;
mod session;
mod tls;
mod tok;
mod util;
#[cfg(test)]
mod generate;
#[cfg(test)]
mod test_util;
pub use api::process_root;
pub use api::process_root_unconditionally;
pub use api::Configuration;
use ascii_canvas::style;
// Need this for rusty_peg
#![recursion_limit = "256"]
// I hate this lint.
#![allow(unused_parens)]
// The builtin tests don't cover the CLI and so forth, and it's just
// too darn annoying to try and make them do so.
#![cfg_attr(test, allow(dead_code))]
extern crate ascii_canvas;
extern crate atty;
extern crate bit_set;
extern crate diff;
extern crate ena;
extern crate itertools;
#[cfg_attr(any(feature = "test", test), macro_use)]
extern crate lalrpop_util;
extern crate petgraph;
extern crate regex;
extern crate regex_syntax;
extern crate sha2;
extern crate string_cache;
extern crate term;
extern crate unicode_xid;
#[cfg(test)]
extern crate rand;
// hoist the modules that define macros up earlier
#[macro_use]
mod rust;
#[macro_use]
mod log;
mod api;
mod build;
mod collections;
mod file_text;
mod grammar;
mod kernel_set;
mod lexer;
mod lr1;
mod message;
mod normalize;
mod parser;
mod session;
mod tls;
mod tok;
mod util;
#[cfg(test)]
mod generate;
#[cfg(test)]
mod test_util;
pub use api::process_root;
pub use api::process_root_unconditionally;
pub use api::Configuration;
use ascii_canvas::style;

View File

@ -51,6 +51,10 @@ fn main1() -> io::Result<()> {
config.emit_comments(true);
}
if args.flag_no_whitespace {
config.emit_whitespace(false);
}
if args.flag_report {
config.emit_report(true);
}
@ -101,6 +105,7 @@ Options:
--features FEATURES Comma separated list of features for conditional compilation.
-f, --force Force execution, even if the .lalrpop file is older than the .rs file.
-c, --color Force colorful output, even if this is not a TTY.
--no-whitespace Removes redundant whitespace from the generated file. (Default: false)
--comments Enable comments in the generated code.
--report Generate report files.
";
@ -114,6 +119,7 @@ struct Args {
flag_force: bool,
flag_color: bool,
flag_comments: bool,
flag_no_whitespace: bool,
flag_report: bool,
flag_version: bool,
}
@ -181,4 +187,13 @@ mod test {
.unwrap();
assert_eq!(args.flag_features, Some("test,abc".to_string()));
}
#[test]
fn emit_whitespace() {
let argv = || vec!["lalrpop", "--no-whitespace", "file.lalrpop"];
let args: Args = Docopt::new(USAGE)
.and_then(|d| d.argv(argv().into_iter()).deserialize())
.unwrap();
assert!(args.flag_no_whitespace, true);
}
}

View File

@ -6,9 +6,19 @@ use tok::{self, Tok};
use util::strip;
use lalrpop_util::ParseError;
use super::Top;
grammar<'input>(text: &'input str);
pub Grammar: Grammar =
pub Top: Top = {
"StartGrammar" <Grammar> => Top::Grammar(<>),
"StartPattern" <Pattern> => Top::Pattern(<>),
"StartMatchMapping" <MatchMapping> => Top::MatchMapping(<>),
"StartTypeRef" <TypeRef> => Top::TypeRef(<>),
"StartGrammarWhereClauses" <GrammarWhereClauses> => Top::GrammarWhereClauses(<>),
};
Grammar: Grammar =
<module_attributes:ShebangAttribute*>
<uses:Use*>
<annotations:Annotation*>
@ -36,7 +46,7 @@ TypeParameter: TypeParameter = {
<l:Id> => TypeParameter::Id(l)
};
pub GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
"where" <Comma<GrammarWhereClause>>;
GrammarWhereClause: WhereClause<TypeRef> = {
@ -226,7 +236,7 @@ SymbolKind1: SymbolKind = {
SymbolKind::Error,
};
pub TypeRef: TypeRef = {
TypeRef: TypeRef = {
"(" <Comma<TypeRef>> ")" =>
TypeRef::Tuple(<>),
@ -299,7 +309,7 @@ MatchItem: MatchItem = {
MatchSymbol = QuotedLiteral;
pub MatchMapping = Terminal;
MatchMapping = Terminal;
EnumToken: EnumToken =
"enum" <lo:@L> <t:TypeRef> <hi:@R> "{"
@ -327,7 +337,7 @@ Conversion: Conversion =
to: pattern })
};
pub Pattern: Pattern<TypeRef> =
Pattern: Pattern<TypeRef> =
<lo:@L> <k:PatternKind> <hi:@R> => Pattern { span: Span(lo, hi), kind: k };
PatternKind: PatternKind<TypeRef> = {
@ -472,5 +482,11 @@ extern {
"*" => Tok::Star,
"~~" => Tok::TildeTilde,
"_" => Tok::Underscore,
"StartGrammar" => Tok::StartGrammar,
"StartPattern" => Tok::StartPattern,
"StartMatchMapping" => Tok::StartMatchMapping,
"StartTypeRef" => Tok::StartTypeRef,
"StartGrammarWhereClauses" => Tok::StartGrammarWhereClauses,
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,54 +1,84 @@
use grammar::parse_tree::*;
use grammar::pattern::*;
use lalrpop_util;
use tok;
#[allow(dead_code)]
mod lrgrammar;
#[cfg(test)]
mod test;
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, 0);
let mut grammar = try!(lrgrammar::GrammarParser::new().parse(input, tokenizer));
// find a unique prefix that does not appear anywhere in the input
while input.contains(&grammar.prefix) {
grammar.prefix.push('_');
}
Ok(grammar)
}
fn parse_pattern<'input>(
input: &'input str,
offset: usize,
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, offset);
lrgrammar::PatternParser::new().parse(input, tokenizer)
}
fn parse_match_mapping<'input>(
input: &'input str,
offset: usize,
) -> Result<MatchMapping, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, offset);
lrgrammar::MatchMappingParser::new().parse(input, tokenizer)
}
#[cfg(test)]
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, 0);
lrgrammar::TypeRefParser::new().parse(input, tokenizer)
}
#[cfg(test)]
pub fn parse_where_clauses<'input>(
input: &'input str,
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
let tokenizer = tok::Tokenizer::new(input, 0);
lrgrammar::GrammarWhereClausesParser::new().parse(input, tokenizer)
}
use std::iter;
use grammar::parse_tree::*;
use grammar::pattern::*;
use lalrpop_util;
use tok;
#[cfg(not(any(feature = "test", test)))]
#[allow(dead_code)]
mod lrgrammar;
#[cfg(any(feature = "test", test))]
lalrpop_mod!(
// ---------------------------------------------------------------------------------------
// NOTE: Run `cargo build -p lalrpop` once before running `cargo test` to create this file
// ---------------------------------------------------------------------------------------
#[allow(dead_code)]
lrgrammar,
"/src/parser/lrgrammar.rs"
);
#[cfg(test)]
mod test;
pub enum Top {
Grammar(Grammar),
Pattern(Pattern<TypeRef>),
MatchMapping(TerminalString),
TypeRef(TypeRef),
GrammarWhereClauses(Vec<WhereClause<TypeRef>>),
}
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
macro_rules! parser {
($input: expr, $offset: expr, $pat: ident, $tok: ident) => {{
let input = $input;
let tokenizer =
iter::once(Ok((0, tok::Tok::$tok, 0))).chain(tok::Tokenizer::new(input, $offset));
lrgrammar::TopParser::new()
.parse(input, tokenizer)
.map(|top| match top {
Top::$pat(x) => x,
_ => unreachable!(),
})
}};
}
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
let mut grammar = try!(parser!(input, 0, Grammar, StartGrammar));
// find a unique prefix that does not appear anywhere in the input
while input.contains(&grammar.prefix) {
grammar.prefix.push('_');
}
Ok(grammar)
}
fn parse_pattern<'input>(
input: &'input str,
offset: usize,
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
parser!(input, offset, Pattern, StartPattern)
}
fn parse_match_mapping<'input>(
input: &'input str,
offset: usize,
) -> Result<MatchMapping, ParseError<'input>> {
parser!(input, offset, MatchMapping, StartMatchMapping)
}
#[cfg(test)]
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
parser!(input, 0, TypeRef, StartTypeRef)
}
#[cfg(test)]
pub fn parse_where_clauses<'input>(
input: &'input str,
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
parser!(input, 0, GrammarWhereClauses, StartGrammarWhereClauses)
}

View File

@ -53,11 +53,15 @@ impl<W: Write> RustWrite<W> {
}
fn write_indentation(&mut self) -> io::Result<()> {
write!(self.write, "{0:1$}", "", self.indent)
if Tls::session().emit_whitespace {
write!(self.write, "{0:1$}", "", self.indent)?;
}
Ok(())
}
fn write_indented(&mut self, out: &str) -> io::Result<()> {
writeln!(self.write, "{0:1$}{2}", "", self.indent, out)
self.write_indentation()?;
writeln!(self.write, "{}", out)
}
pub fn write_table_row<I, C>(&mut self, iterable: I) -> io::Result<()>
@ -65,7 +69,8 @@ impl<W: Write> RustWrite<W> {
I: IntoIterator<Item = (i32, C)>,
C: fmt::Display,
{
if Tls::session().emit_comments {
let session = Tls::session();
if session.emit_comments {
for (i, comment) in iterable {
try!(self.write_indentation());
try!(writeln!(self.write, "{}, {}", i, comment));
@ -74,7 +79,7 @@ impl<W: Write> RustWrite<W> {
try!(self.write_indentation());
let mut first = true;
for (i, _comment) in iterable {
if !first {
if !first && session.emit_whitespace {
try!(write!(self.write, " "));
}
try!(write!(self.write, "{},", i));

View File

@ -41,6 +41,9 @@ pub struct Session {
/// forth.
pub emit_comments: bool,
/// Emit whitespace in the generated code to improve readability.
pub emit_whitespace: bool,
/// Emit report file about generated code
pub emit_report: bool,
@ -92,6 +95,7 @@ impl Session {
out_dir: None,
force_build: false,
emit_comments: false,
emit_whitespace: true,
emit_report: false,
color_config: ColorConfig::default(),
max_errors: 1,
@ -117,6 +121,7 @@ impl Session {
out_dir: None,
force_build: false,
emit_comments: false,
emit_whitespace: true,
emit_report: false,
color_config: ColorConfig::IfTty,
max_errors: 1,

View File

@ -97,6 +97,15 @@ pub enum Tok<'input> {
Underscore,
Bang,
ShebangAttribute(&'input str), // #![...]
// Dummy tokens for parser sharing
StartGrammar,
StartPattern,
StartMatchMapping,
#[allow(dead_code)]
StartGrammarWhereClauses,
#[allow(dead_code)]
StartTypeRef,
}
pub struct Tokenizer<'input> {

View File

@ -1,3 +1,3 @@
#!/bin/bash
cargo run -p lalrpop -- --force --out-dir . lalrpop/src/parser/lrgrammar.lalrpop
cargo run -p lalrpop -- --force --no-whitespace --out-dir . lalrpop/src/parser/lrgrammar.lalrpop