mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-16 17:00:53 +00:00
Merge the public parsers in lrgrammar to one
This shrinks the generated code to 1Mb (from 2Mb WITH the cfg approach in #413) while also removing the cfg'ing itself as that barely reduces the size. (Thus this supersedes #413) While this is a bit boilerplatey, I'd say it is worthile. We could generate this in LALRPOP itself with some work https://github.com/lalrpop/lalrpop/issues/304#issuecomment-431654594
This commit is contained in:
parent
af12168773
commit
e5f2897aee
58
lalrpop/build.rs
Normal file
58
lalrpop/build.rs
Normal file
@ -0,0 +1,58 @@
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{exit, Command};
|
||||
|
||||
fn main() {
|
||||
if let Err(err) = main_() {
|
||||
eprintln!("{}", err);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn main_() -> Result<(), Box<Error>> {
|
||||
let grammar_file = "src/parser/lrgrammar.lalrpop";
|
||||
println!(r#"cargo:rerun-if-changed={}"#, grammar_file);
|
||||
|
||||
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("cargo did not set OUT_DIR"));
|
||||
|
||||
fs::create_dir_all(out_dir.join("src/parser"))?;
|
||||
|
||||
let target_dir = if Path::new("target").exists() {
|
||||
Path::new("target")
|
||||
} else {
|
||||
Path::new("../target")
|
||||
};
|
||||
|
||||
let lalrpop_path = target_dir
|
||||
.join("debug/lalrpop")
|
||||
.with_extension(env::consts::EXE_EXTENSION);
|
||||
println!(r#"cargo:rerun-if-changed={}"#, lalrpop_path.display());
|
||||
|
||||
if lalrpop_path.exists() {
|
||||
// If compiling lalrpop itself, enable test parsers
|
||||
if target_dir.exists() {
|
||||
env::set_var("CARGO_FEATURE_TEST", "1");
|
||||
println!(r#"cargo:rustc-cfg=feature="test""#);
|
||||
}
|
||||
|
||||
let copied_grammar = out_dir.join("src/parser/lrgrammar.lalrpop");
|
||||
fs::copy(grammar_file, &copied_grammar)
|
||||
.map_err(|err| format!("Unable to grammar to OUT_DIR: {}", err))?;
|
||||
let status = Command::new(lalrpop_path)
|
||||
.args(&[
|
||||
"--force",
|
||||
"--features",
|
||||
"test",
|
||||
copied_grammar
|
||||
.to_str()
|
||||
.expect("grammar path is not valid UTF-8"),
|
||||
])
|
||||
.status()?;
|
||||
if !status.success() {
|
||||
return Err("Compiling the .lalrpop file failed".into());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
@ -1,57 +1,58 @@
|
||||
// Need this for rusty_peg
|
||||
#![recursion_limit = "256"]
|
||||
// I hate this lint.
|
||||
#![allow(unused_parens)]
|
||||
// The builtin tests don't cover the CLI and so forth, and it's just
|
||||
// too darn annoying to try and make them do so.
|
||||
#![cfg_attr(test, allow(dead_code))]
|
||||
|
||||
extern crate ascii_canvas;
|
||||
extern crate atty;
|
||||
extern crate bit_set;
|
||||
extern crate diff;
|
||||
extern crate ena;
|
||||
extern crate itertools;
|
||||
extern crate lalrpop_util;
|
||||
extern crate petgraph;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax;
|
||||
extern crate sha2;
|
||||
extern crate string_cache;
|
||||
extern crate term;
|
||||
extern crate unicode_xid;
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate rand;
|
||||
|
||||
// hoist the modules that define macros up earlier
|
||||
#[macro_use]
|
||||
mod rust;
|
||||
#[macro_use]
|
||||
mod log;
|
||||
|
||||
mod api;
|
||||
mod build;
|
||||
mod collections;
|
||||
mod file_text;
|
||||
mod grammar;
|
||||
mod kernel_set;
|
||||
mod lexer;
|
||||
mod lr1;
|
||||
mod message;
|
||||
mod normalize;
|
||||
mod parser;
|
||||
mod session;
|
||||
mod tls;
|
||||
mod tok;
|
||||
mod util;
|
||||
|
||||
#[cfg(test)]
|
||||
mod generate;
|
||||
#[cfg(test)]
|
||||
mod test_util;
|
||||
|
||||
pub use api::process_root;
|
||||
pub use api::process_root_unconditionally;
|
||||
pub use api::Configuration;
|
||||
use ascii_canvas::style;
|
||||
// Need this for rusty_peg
|
||||
#![recursion_limit = "256"]
|
||||
// I hate this lint.
|
||||
#![allow(unused_parens)]
|
||||
// The builtin tests don't cover the CLI and so forth, and it's just
|
||||
// too darn annoying to try and make them do so.
|
||||
#![cfg_attr(test, allow(dead_code))]
|
||||
|
||||
extern crate ascii_canvas;
|
||||
extern crate atty;
|
||||
extern crate bit_set;
|
||||
extern crate diff;
|
||||
extern crate ena;
|
||||
extern crate itertools;
|
||||
#[cfg_attr(any(feature = "test", test), macro_use)]
|
||||
extern crate lalrpop_util;
|
||||
extern crate petgraph;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax;
|
||||
extern crate sha2;
|
||||
extern crate string_cache;
|
||||
extern crate term;
|
||||
extern crate unicode_xid;
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate rand;
|
||||
|
||||
// hoist the modules that define macros up earlier
|
||||
#[macro_use]
|
||||
mod rust;
|
||||
#[macro_use]
|
||||
mod log;
|
||||
|
||||
mod api;
|
||||
mod build;
|
||||
mod collections;
|
||||
mod file_text;
|
||||
mod grammar;
|
||||
mod kernel_set;
|
||||
mod lexer;
|
||||
mod lr1;
|
||||
mod message;
|
||||
mod normalize;
|
||||
mod parser;
|
||||
mod session;
|
||||
mod tls;
|
||||
mod tok;
|
||||
mod util;
|
||||
|
||||
#[cfg(test)]
|
||||
mod generate;
|
||||
#[cfg(test)]
|
||||
mod test_util;
|
||||
|
||||
pub use api::process_root;
|
||||
pub use api::process_root_unconditionally;
|
||||
pub use api::Configuration;
|
||||
use ascii_canvas::style;
|
||||
|
@ -6,9 +6,19 @@ use tok::{self, Tok};
|
||||
use util::strip;
|
||||
use lalrpop_util::ParseError;
|
||||
|
||||
use super::Top;
|
||||
|
||||
grammar<'input>(text: &'input str);
|
||||
|
||||
pub Grammar: Grammar =
|
||||
pub Top: Top = {
|
||||
"StartGrammar" <Grammar> => Top::Grammar(<>),
|
||||
"StartPattern" <Pattern> => Top::Pattern(<>),
|
||||
"StartMatchMapping" <MatchMapping> => Top::MatchMapping(<>),
|
||||
"StartTypeRef" <TypeRef> => Top::TypeRef(<>),
|
||||
"StartGrammarWhereClauses" <GrammarWhereClauses> => Top::GrammarWhereClauses(<>),
|
||||
};
|
||||
|
||||
Grammar: Grammar =
|
||||
<module_attributes:ShebangAttribute*>
|
||||
<uses:Use*>
|
||||
<annotations:Annotation*>
|
||||
@ -36,7 +46,7 @@ TypeParameter: TypeParameter = {
|
||||
<l:Id> => TypeParameter::Id(l)
|
||||
};
|
||||
|
||||
pub GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
|
||||
GrammarWhereClauses: Vec<WhereClause<TypeRef>> =
|
||||
"where" <Comma<GrammarWhereClause>>;
|
||||
|
||||
GrammarWhereClause: WhereClause<TypeRef> = {
|
||||
@ -226,7 +236,7 @@ SymbolKind1: SymbolKind = {
|
||||
SymbolKind::Error,
|
||||
};
|
||||
|
||||
pub TypeRef: TypeRef = {
|
||||
TypeRef: TypeRef = {
|
||||
"(" <Comma<TypeRef>> ")" =>
|
||||
TypeRef::Tuple(<>),
|
||||
|
||||
@ -299,7 +309,7 @@ MatchItem: MatchItem = {
|
||||
|
||||
MatchSymbol = QuotedLiteral;
|
||||
|
||||
pub MatchMapping = Terminal;
|
||||
MatchMapping = Terminal;
|
||||
|
||||
EnumToken: EnumToken =
|
||||
"enum" <lo:@L> <t:TypeRef> <hi:@R> "{"
|
||||
@ -327,7 +337,7 @@ Conversion: Conversion =
|
||||
to: pattern })
|
||||
};
|
||||
|
||||
pub Pattern: Pattern<TypeRef> =
|
||||
Pattern: Pattern<TypeRef> =
|
||||
<lo:@L> <k:PatternKind> <hi:@R> => Pattern { span: Span(lo, hi), kind: k };
|
||||
|
||||
PatternKind: PatternKind<TypeRef> = {
|
||||
@ -472,5 +482,11 @@ extern {
|
||||
"*" => Tok::Star,
|
||||
"~~" => Tok::TildeTilde,
|
||||
"_" => Tok::Underscore,
|
||||
|
||||
"StartGrammar" => Tok::StartGrammar,
|
||||
"StartPattern" => Tok::StartPattern,
|
||||
"StartMatchMapping" => Tok::StartMatchMapping,
|
||||
"StartTypeRef" => Tok::StartTypeRef,
|
||||
"StartGrammarWhereClauses" => Tok::StartGrammarWhereClauses,
|
||||
}
|
||||
}
|
||||
|
@ -1,54 +1,84 @@
|
||||
use grammar::parse_tree::*;
|
||||
use grammar::pattern::*;
|
||||
use lalrpop_util;
|
||||
use tok;
|
||||
|
||||
#[allow(dead_code)]
|
||||
mod lrgrammar;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
|
||||
|
||||
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, 0);
|
||||
let mut grammar = try!(lrgrammar::GrammarParser::new().parse(input, tokenizer));
|
||||
|
||||
// find a unique prefix that does not appear anywhere in the input
|
||||
while input.contains(&grammar.prefix) {
|
||||
grammar.prefix.push('_');
|
||||
}
|
||||
|
||||
Ok(grammar)
|
||||
}
|
||||
|
||||
fn parse_pattern<'input>(
|
||||
input: &'input str,
|
||||
offset: usize,
|
||||
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, offset);
|
||||
lrgrammar::PatternParser::new().parse(input, tokenizer)
|
||||
}
|
||||
|
||||
fn parse_match_mapping<'input>(
|
||||
input: &'input str,
|
||||
offset: usize,
|
||||
) -> Result<MatchMapping, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, offset);
|
||||
lrgrammar::MatchMappingParser::new().parse(input, tokenizer)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, 0);
|
||||
lrgrammar::TypeRefParser::new().parse(input, tokenizer)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn parse_where_clauses<'input>(
|
||||
input: &'input str,
|
||||
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
|
||||
let tokenizer = tok::Tokenizer::new(input, 0);
|
||||
lrgrammar::GrammarWhereClausesParser::new().parse(input, tokenizer)
|
||||
}
|
||||
use std::iter;
|
||||
|
||||
use grammar::parse_tree::*;
|
||||
use grammar::pattern::*;
|
||||
use lalrpop_util;
|
||||
use tok;
|
||||
|
||||
#[cfg(not(any(feature = "test", test)))]
|
||||
#[allow(dead_code)]
|
||||
mod lrgrammar;
|
||||
|
||||
#[cfg(any(feature = "test", test))]
|
||||
lalrpop_mod!(
|
||||
// ---------------------------------------------------------------------------------------
|
||||
// NOTE: Run `cargo build -p lalrpop` once before running `cargo test` to create this file
|
||||
// ---------------------------------------------------------------------------------------
|
||||
#[allow(dead_code)]
|
||||
lrgrammar,
|
||||
"/src/parser/lrgrammar.rs"
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub enum Top {
|
||||
Grammar(Grammar),
|
||||
Pattern(Pattern<TypeRef>),
|
||||
MatchMapping(TerminalString),
|
||||
TypeRef(TypeRef),
|
||||
GrammarWhereClauses(Vec<WhereClause<TypeRef>>),
|
||||
}
|
||||
|
||||
pub type ParseError<'input> = lalrpop_util::ParseError<usize, tok::Tok<'input>, tok::Error>;
|
||||
|
||||
macro_rules! parser {
|
||||
($input: expr, $offset: expr, $pat: ident, $tok: ident) => {{
|
||||
let input = $input;
|
||||
let tokenizer =
|
||||
iter::once(Ok((0, tok::Tok::$tok, 0))).chain(tok::Tokenizer::new(input, $offset));
|
||||
lrgrammar::TopParser::new()
|
||||
.parse(input, tokenizer)
|
||||
.map(|top| match top {
|
||||
Top::$pat(x) => x,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn parse_grammar<'input>(input: &'input str) -> Result<Grammar, ParseError<'input>> {
|
||||
let mut grammar = try!(parser!(input, 0, Grammar, StartGrammar));
|
||||
|
||||
// find a unique prefix that does not appear anywhere in the input
|
||||
while input.contains(&grammar.prefix) {
|
||||
grammar.prefix.push('_');
|
||||
}
|
||||
|
||||
Ok(grammar)
|
||||
}
|
||||
|
||||
fn parse_pattern<'input>(
|
||||
input: &'input str,
|
||||
offset: usize,
|
||||
) -> Result<Pattern<TypeRef>, ParseError<'input>> {
|
||||
parser!(input, offset, Pattern, StartPattern)
|
||||
}
|
||||
|
||||
fn parse_match_mapping<'input>(
|
||||
input: &'input str,
|
||||
offset: usize,
|
||||
) -> Result<MatchMapping, ParseError<'input>> {
|
||||
parser!(input, offset, MatchMapping, StartMatchMapping)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn parse_type_ref<'input>(input: &'input str) -> Result<TypeRef, ParseError<'input>> {
|
||||
parser!(input, 0, TypeRef, StartTypeRef)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn parse_where_clauses<'input>(
|
||||
input: &'input str,
|
||||
) -> Result<Vec<WhereClause<TypeRef>>, ParseError<'input>> {
|
||||
parser!(input, 0, GrammarWhereClauses, StartGrammarWhereClauses)
|
||||
}
|
||||
|
@ -97,6 +97,15 @@ pub enum Tok<'input> {
|
||||
Underscore,
|
||||
Bang,
|
||||
ShebangAttribute(&'input str), // #![...]
|
||||
|
||||
// Dummy tokens for parser sharing
|
||||
StartGrammar,
|
||||
StartPattern,
|
||||
StartMatchMapping,
|
||||
#[allow(dead_code)]
|
||||
StartGrammarWhereClauses,
|
||||
#[allow(dead_code)]
|
||||
StartTypeRef,
|
||||
}
|
||||
|
||||
pub struct Tokenizer<'input> {
|
||||
|
Loading…
x
Reference in New Issue
Block a user