mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-16 17:00:53 +00:00
Merge pull request #414 from Marwes/shrink_lrgrammar2
feat: Shrink LALRPOPs own parser to 1Mb
This commit is contained in:
commit
eba453d876
2
.gitattributes
vendored
2
.gitattributes
vendored
@ -1 +1,3 @@
|
||||
* text=auto
|
||||
|
||||
lalrpop/src/parser/lrgrammar.lalrpop text eol=lf
|
||||
|
@ -927,6 +927,7 @@ fn verify_lalrpop_generates_itself() {
|
||||
Command::new("../target/debug/lalrpop")
|
||||
.args(&[
|
||||
"--force",
|
||||
"--no-whitespace",
|
||||
"--out-dir",
|
||||
out_dir,
|
||||
copied_grammar_file
|
||||
|
58
lalrpop/build.rs
Normal file
58
lalrpop/build.rs
Normal file
@ -0,0 +1,58 @@
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{exit, Command};
|
||||
|
||||
fn main() {
|
||||
if let Err(err) = main_() {
|
||||
eprintln!("{}", err);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn main_() -> Result<(), Box<Error>> {
|
||||
let grammar_file = "src/parser/lrgrammar.lalrpop";
|
||||
println!(r#"cargo:rerun-if-changed={}"#, grammar_file);
|
||||
|
||||
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("cargo did not set OUT_DIR"));
|
||||
|
||||
fs::create_dir_all(out_dir.join("src/parser"))?;
|
||||
|
||||
let target_dir = if Path::new("target").exists() {
|
||||
Path::new("target")
|
||||
} else {
|
||||
Path::new("../target")
|
||||
};
|
||||
|
||||
let lalrpop_path = target_dir
|
||||
.join("debug/lalrpop")
|
||||
.with_extension(env::consts::EXE_EXTENSION);
|
||||
println!(r#"cargo:rerun-if-changed={}"#, lalrpop_path.display());
|
||||
|
||||
if lalrpop_path.exists() {
|
||||
// If compiling lalrpop itself, enable test parsers
|
||||
if target_dir.exists() {
|
||||
env::set_var("CARGO_FEATURE_TEST", "1");
|
||||
println!(r#"cargo:rustc-cfg=feature="test""#);
|
||||
}
|
||||
|
||||
let copied_grammar = out_dir.join("src/parser/lrgrammar.lalrpop");
|
||||
fs::copy(grammar_file, &copied_grammar)
|
||||
.map_err(|err| format!("Unable to grammar to OUT_DIR: {}", err))?;
|
||||
let status = Command::new(lalrpop_path)
|
||||
.args(&[
|
||||
"--force",
|
||||
"--features",
|
||||
"test",
|
||||
copied_grammar
|
||||
.to_str()
|
||||
.expect("grammar path is not valid UTF-8"),
|
||||
])
|
||||
.status()?;
|
||||
if !status.success() {
|
||||
return Err("Compiling the .lalrpop file failed".into());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
@ -103,6 +103,13 @@ impl Configuration {
|
||||
self
|
||||
}
|
||||
|
||||
/// If false, shrinks the generated code by removing redundant white space.
|
||||
/// Default is true.
|
||||
pub fn emit_whitespace(&mut self, val: bool) -> &mut Configuration {
|
||||
self.session.emit_whitespace = val;
|
||||
self
|
||||
}
|
||||
|
||||
/// If true, emit report file about generated code.
|
||||
pub fn emit_report(&mut self, val: bool) -> &mut Configuration {
|
||||
self.session.emit_report = val;
|
||||
|
@ -1,57 +1,58 @@
|
||||
// Need this for rusty_peg
|
||||
#![recursion_limit = "256"]
|
||||
// I hate this lint.
|
||||
#![allow(unused_parens)]
|
||||
// The builtin tests don't cover the CLI and so forth, and it's just
|
||||
// too darn annoying to try and make them do so.
|
||||
#![cfg_attr(test, allow(dead_code))]
|
||||
|
||||
extern crate ascii_canvas;
|
||||
extern crate atty;
|
||||
extern crate bit_set;
|
||||
extern crate diff;
|
||||
extern crate ena;
|
||||
extern crate itertools;
|
||||
extern crate lalrpop_util;
|
||||
extern crate petgraph;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax;
|
||||
extern crate sha2;
|
||||
extern crate string_cache;
|
||||
extern crate term;
|
||||
extern crate unicode_xid;
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate rand;
|
||||
|
||||
// hoist the modules that define macros up earlier
|
||||
#[macro_use]
|
||||
mod rust;
|
||||
#[macro_use]
|
||||
mod log;
|
||||
|
||||
mod api;
|
||||
mod build;
|
||||
mod collections;
|
||||
mod file_text;
|
||||
mod grammar;
|
||||
mod kernel_set;
|
||||
mod lexer;
|
||||
mod lr1;
|
||||
mod message;
|
||||
mod normalize;
|
||||
mod parser;
|
||||
mod session;
|
||||
mod tls;
|
||||
mod tok;
|
||||
mod util;
|
||||
|
||||
#[cfg(test)]
|
||||
mod generate;
|
||||
#[cfg(test)]
|
||||
mod test_util;
|
||||
|
||||
pub use api::process_root;
|
||||
pub use api::process_root_unconditionally;
|
||||
pub use api::Configuration;
|
||||
use ascii_canvas::style;
|
||||
// Need this for rusty_peg
|
||||
#![recursion_limit = "256"]
|
||||
// I hate this lint.
|
||||
#![allow(unused_parens)]
|
||||
// The builtin tests don't cover the CLI and so forth, and it's just
|
||||
// too darn annoying to try and make them do so.
|
||||
#![cfg_attr(test, allow(dead_code))]
|
||||
|
||||
extern crate ascii_canvas;
|
||||
extern crate atty;
|
||||
extern crate bit_set;
|
||||
extern crate diff;
|
||||
extern crate ena;
|
||||
extern crate itertools;
|
||||
#[cfg_attr(any(feature = "test", test), macro_use)]
|
||||
extern crate lalrpop_util;
|
||||
extern crate petgraph;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax;
|
||||
extern crate sha2;
|
||||
extern crate string_cache;
|
||||
extern crate term;
|
||||
extern crate unicode_xid;
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate rand;
|
||||
|
||||
// hoist the modules that define macros up earlier
|
||||
#[macro_use]
|
||||
mod rust;
|
||||
#[macro_use]
|
||||
mod log;
|
||||
|
||||
mod api;
|
||||
mod build;
|
||||
mod collections;
|
||||
mod file_text;
|
||||
mod grammar;
|
||||
mod kernel_set;
|
||||
mod lexer;
|
||||
mod lr1;
|
||||
mod message;
|
||||
mod normalize;
|
||||
mod parser;
|
||||
mod session;
|
||||
mod tls;
|
||||
mod tok;
|
||||
mod util;
|
||||
|
||||
#[cfg(test)]
|
||||
mod generate;
|
||||
#[cfg(test)]
|
||||
mod test_util;
|
||||
|
||||
pub use api::process_root;
|
||||
pub use api::process_root_unconditionally;
|
||||
pub use api::Configuration;
|
||||
use ascii_canvas::style;
|
||||
|
@ -51,6 +51,10 @@ fn main1() -> io::Result<()> {
|
||||
config.emit_comments(true);
|
||||
}
|
||||
|
||||
if args.flag_no_whitespace {
|
||||
config.emit_whitespace(false);
|
||||
}
|
||||
|
||||
if args.flag_report {
|
||||
config.emit_report(true);
|
||||
}
|
||||
@ -101,6 +105,7 @@ Options:
|
||||
--features FEATURES Comma separated list of features for conditional compilation.
|
||||
-f, --force Force execution, even if the .lalrpop file is older than the .rs file.
|
||||
-c, --color Force colorful output, even if this is not a TTY.
|
||||
--no-whitespace Removes redundant whitespace from the generated file. (Default: false)
|
||||
--comments Enable comments in the generated code.
|
||||
--report Generate report files.
|
||||
";
|
||||
@ -114,6 +119,7 @@ struct Args {
|
||||
flag_force: bool,
|
||||
flag_color: bool,
|
||||
flag_comments: bool,
|
||||
flag_no_whitespace: bool,
|
||||
flag_report: bool,
|
||||
flag_version: bool,
|
||||
}
|
||||
@ -181,4 +187,13 @@ mod test {
|
||||
.unwrap();
|
||||
assert_eq!(args.flag_features, Some("test,abc".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_whitespace() {
|
||||
let argv = || vec!["lalrpop", "--no-whitespace", "file.lalrpop"];
|
||||
let args: Args = Docopt::new(USAGE)
|
||||
.and_then(|d| d.argv(argv().into_iter()).deserialize())
|
||||
.unwrap();
|
||||
assert!(args.flag_no_whitespace, true);
|
||||
}
|
||||
}
|
||||
|
@ -6,9 +6,19 @@ use tok::{self, Tok};
|
||||
use util::strip;
|
||||
use lalrpop_util::ParseError;
|
||||
|
||||
use super::Top;
|
||||
|
||||
grammar<'input>(text: &'input str);
|
||||
|
||||
pub Grammar: Grammar =
|
||||
pub Top: Top = {
|
||||
"StartGrammar" <Grammar> => Top::Grammar(<>),
|
||||
"StartPattern" <Pattern> => Top::Pattern(<>),
|
||||
"StartMatchMapping" <MatchMapping> => Top::MatchMapping(<>),
|
||||
"StartTypeRef" <TypeRef> => Top::TypeRef(<>),
|
||||
"StartGrammarWhereClauses" <GrammarWhereClauses> => Top::GrammarWhereClauses(<>),
|
||||
};
|
||||
|
||||
Grammar: Grammar =
|
||||
<module_attributes:ShebangAttribute*>
|
||||
<uses:Use*>
|
||||
<annotations:Annotation*>
|
||||
@ -36,7 +46,7 @@ TypeParameter: TypeParameter = {
|
||||
<l:Id> => TypeParameter::Id(l)
|
||||
};
|
||||
|
||||
pub GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
|
||||
GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
|
||||
"where" <Comma<GrammarWhereClause>>;
|
||||
|
||||
GrammarWhereClause: WhereClause<TypeRef> = {
|
||||
@ -226,7 +236,7 @@ SymbolKind1: SymbolKind = {
|
||||
SymbolKind::Error,
|
||||
};
|
||||
|
||||
pub TypeRef: TypeRef = {
|
||||
TypeRef: TypeRef = {
|
||||
"(" <Comma<TypeRef>> ")" =>
|
||||
TypeRef::Tuple(<>),
|
||||
|
||||
@ -299,7 +309,7 @@ MatchItem: MatchItem = {
|
||||
|
||||
MatchSymbol = QuotedLiteral;
|
||||
|
||||
pub MatchMapping = Terminal;
|
||||
MatchMapping = Terminal;
|
||||
|
||||
EnumToken: EnumToken =
|
||||
"enum" <lo:@L> <t:TypeRef> <hi:@R> "{"
|
||||
@ -327,7 +337,7 @@ Conversion: Conversion =
|
||||
to: pattern })
|
||||
};
|
||||
|
||||
pub Pattern: Pattern<TypeRef> =
|
||||
Pattern: Pattern<TypeRef> =
|
||||
<lo:@L> <k:PatternKind> <hi:@R> => Pattern { span: Span(lo, hi), kind: k };
|
||||
|
||||
PatternKind: PatternKind<TypeRef> = {
|
||||
@ -472,5 +482,11 @@ extern {
|
||||
"*" => Tok::Star,
|
||||
"~~" => Tok::TildeTilde,
|
||||
"_" => Tok::Underscore,
|
||||
|
||||
"StartGrammar" => Tok::StartGrammar,
|
||||
"StartPattern" => Tok::StartPattern,
|
||||
"StartMatchMapping" => Tok::StartMatchMapping,
|
||||
"StartTypeRef" => Tok::StartTypeRef,
|
||||
"StartGrammarWhereClauses" => Tok::StartGrammarWhereClauses,
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,54 +1,84 @@
|
||||
use grammar::parse_tree::*;
|
||||
use grammar::pattern::*;
|
||||
use lalrpop_util;
|
||||
use tok;
|
||||
|
||||
#[allow(dead_code)]
|
||||
mod lrgrammar;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
|
||||
|
||||
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, 0);
|
||||
let mut grammar = try!(lrgrammar::GrammarParser::new().parse(input, tokenizer));
|
||||
|
||||
// find a unique prefix that does not appear anywhere in the input
|
||||
while input.contains(&grammar.prefix) {
|
||||
grammar.prefix.push('_');
|
||||
}
|
||||
|
||||
Ok(grammar)
|
||||
}
|
||||
|
||||
fn parse_pattern<'input>(
|
||||
input: &'input str,
|
||||
offset: usize,
|
||||
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, offset);
|
||||
lrgrammar::PatternParser::new().parse(input, tokenizer)
|
||||
}
|
||||
|
||||
fn parse_match_mapping<'input>(
|
||||
input: &'input str,
|
||||
offset: usize,
|
||||
) -> Result<MatchMapping, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, offset);
|
||||
lrgrammar::MatchMappingParser::new().parse(input, tokenizer)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, 0);
|
||||
lrgrammar::TypeRefParser::new().parse(input, tokenizer)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn parse_where_clauses<'input>(
|
||||
input: &'input str,
|
||||
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, 0);
|
||||
lrgrammar::GrammarWhereClausesParser::new().parse(input, tokenizer)
|
||||
}
|
||||
use std::iter;
|
||||
|
||||
use grammar::parse_tree::*;
|
||||
use grammar::pattern::*;
|
||||
use lalrpop_util;
|
||||
use tok;
|
||||
|
||||
#[cfg(not(any(feature = "test", test)))]
|
||||
#[allow(dead_code)]
|
||||
mod lrgrammar;
|
||||
|
||||
#[cfg(any(feature = "test", test))]
|
||||
lalrpop_mod!(
|
||||
// ---------------------------------------------------------------------------------------
|
||||
// NOTE: Run `cargo build -p lalrpop` once before running `cargo test` to create this file
|
||||
// ---------------------------------------------------------------------------------------
|
||||
#[allow(dead_code)]
|
||||
lrgrammar,
|
||||
"/src/parser/lrgrammar.rs"
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub enum Top {
|
||||
Grammar(Grammar),
|
||||
Pattern(Pattern<TypeRef>),
|
||||
MatchMapping(TerminalString),
|
||||
TypeRef(TypeRef),
|
||||
GrammarWhereClauses(Vec<WhereClause<TypeRef>>),
|
||||
}
|
||||
|
||||
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
|
||||
|
||||
macro_rules! parser {
|
||||
($input: expr, $offset: expr, $pat: ident, $tok: ident) => {{
|
||||
let input = $input;
|
||||
let tokenizer =
|
||||
iter::once(Ok((0, tok::Tok::$tok, 0))).chain(tok::Tokenizer::new(input, $offset));
|
||||
lrgrammar::TopParser::new()
|
||||
.parse(input, tokenizer)
|
||||
.map(|top| match top {
|
||||
Top::$pat(x) => x,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
|
||||
let mut grammar = try!(parser!(input, 0, Grammar, StartGrammar));
|
||||
|
||||
// find a unique prefix that does not appear anywhere in the input
|
||||
while input.contains(&grammar.prefix) {
|
||||
grammar.prefix.push('_');
|
||||
}
|
||||
|
||||
Ok(grammar)
|
||||
}
|
||||
|
||||
fn parse_pattern<'input>(
|
||||
input: &'input str,
|
||||
offset: usize,
|
||||
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
|
||||
parser!(input, offset, Pattern, StartPattern)
|
||||
}
|
||||
|
||||
fn parse_match_mapping<'input>(
|
||||
input: &'input str,
|
||||
offset: usize,
|
||||
) -> Result<MatchMapping, ParseError<'input>> {
|
||||
parser!(input, offset, MatchMapping, StartMatchMapping)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
|
||||
parser!(input, 0, TypeRef, StartTypeRef)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn parse_where_clauses<'input>(
|
||||
input: &'input str,
|
||||
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
|
||||
parser!(input, 0, GrammarWhereClauses, StartGrammarWhereClauses)
|
||||
}
|
||||
|
@ -53,11 +53,15 @@ impl<W: Write> RustWrite<W> {
|
||||
}
|
||||
|
||||
fn write_indentation(&mut self) -> io::Result<()> {
|
||||
write!(self.write, "{0:1$}", "", self.indent)
|
||||
if Tls::session().emit_whitespace {
|
||||
write!(self.write, "{0:1$}", "", self.indent)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_indented(&mut self, out: &str) -> io::Result<()> {
|
||||
writeln!(self.write, "{0:1$}{2}", "", self.indent, out)
|
||||
self.write_indentation()?;
|
||||
writeln!(self.write, "{}", out)
|
||||
}
|
||||
|
||||
pub fn write_table_row<I, C>(&mut self, iterable: I) -> io::Result<()>
|
||||
@ -65,7 +69,8 @@ impl<W: Write> RustWrite<W> {
|
||||
I: IntoIterator<Item = (i32, C)>,
|
||||
C: fmt::Display,
|
||||
{
|
||||
if Tls::session().emit_comments {
|
||||
let session = Tls::session();
|
||||
if session.emit_comments {
|
||||
for (i, comment) in iterable {
|
||||
try!(self.write_indentation());
|
||||
try!(writeln!(self.write, "{}, {}", i, comment));
|
||||
@ -74,7 +79,7 @@ impl<W: Write> RustWrite<W> {
|
||||
try!(self.write_indentation());
|
||||
let mut first = true;
|
||||
for (i, _comment) in iterable {
|
||||
if !first {
|
||||
if !first && session.emit_whitespace {
|
||||
try!(write!(self.write, " "));
|
||||
}
|
||||
try!(write!(self.write, "{},", i));
|
||||
|
@ -41,6 +41,9 @@ pub struct Session {
|
||||
/// forth.
|
||||
pub emit_comments: bool,
|
||||
|
||||
/// Emit whitespace in the generated code to improve readability.
|
||||
pub emit_whitespace: bool,
|
||||
|
||||
/// Emit report file about generated code
|
||||
pub emit_report: bool,
|
||||
|
||||
@ -92,6 +95,7 @@ impl Session {
|
||||
out_dir: None,
|
||||
force_build: false,
|
||||
emit_comments: false,
|
||||
emit_whitespace: true,
|
||||
emit_report: false,
|
||||
color_config: ColorConfig::default(),
|
||||
max_errors: 1,
|
||||
@ -117,6 +121,7 @@ impl Session {
|
||||
out_dir: None,
|
||||
force_build: false,
|
||||
emit_comments: false,
|
||||
emit_whitespace: true,
|
||||
emit_report: false,
|
||||
color_config: ColorConfig::IfTty,
|
||||
max_errors: 1,
|
||||
|
@ -97,6 +97,15 @@ pub enum Tok<'input> {
|
||||
Underscore,
|
||||
Bang,
|
||||
ShebangAttribute(&'input str), // #![...]
|
||||
|
||||
// Dummy tokens for parser sharing
|
||||
StartGrammar,
|
||||
StartPattern,
|
||||
StartMatchMapping,
|
||||
#[allow(dead_code)]
|
||||
StartGrammarWhereClauses,
|
||||
#[allow(dead_code)]
|
||||
StartTypeRef,
|
||||
}
|
||||
|
||||
pub struct Tokenizer<'input> {
|
||||
|
Loading…
x
Reference in New Issue
Block a user