Implement backtrace

This commit is contained in:
Niko Matsakis 2016-02-02 06:31:35 -05:00
parent 52e7208239
commit 22d3c904a9
10 changed files with 421 additions and 113 deletions

View File

@ -1,94 +0,0 @@
use petgraph::graph::{Graph, NodeIndex};
struct Backtrace<'grammar> {
items: Vec<Item<'grammar>>
}
/// Given that the state `state` can reduce `production` when seeing
/// `lookahead` --- in other words, that it contains
///
/// NT = ... (*) [L]
///
/// where `production = NT = ...` --- then this function traverses the
/// state graph to give a backtrace explaining *why* this state exists
/// with that production and lookahead in the first place.
pub fn backtraces(states: &[State<'grammar>],
state: StateIndex,
production: &'grammar Production,
lookahead: Lookaround)
-> Vec<Backtrace<'grammar>> {
}
// Each state `s` corresponds to the node in the graph with index
// `s`. The edges are the shift transitions.
type StateGraph = Graph<(), TerminalString>;
fn state_graph(states: &[State<'grammar>]) -> StateGraph {
let mut graph = Graph::new();
// First, create the nodes.
for i in 0..states.len() {
let j = graph.add_node(());
assert_eq!(i, j);
}
// Add in the edges.
for (i, state) in states.iter().enumerate() {
let all_actions =
state.conflicts.iter()
.flat_map(|(lookahead, conflicts)| {
conflicts.iter()
.map(|c| (lookahead, &c.action))
})
.chain(state.actions.iter());
for (&lookahead, action) in all_actions {
match action {
Action::Shift(target) => { graph.add_edge(i, target.0 as usize, lookahead); }
Action::Reduce(_) => { }
}
}
}
}
{
if item.index == 0 {
// The item that caused a conflict looked like:
//
// X := (*) ...y [K]
//
// This could arise in two scenarios. Either there
// is another item in the same state like:
//
// Z := ... (*) X ...
//
// and hence we added the conflicting item due to an epsilon
// move, or else in some other item we have:
//
//
//
}
for state in states {
for item in &state.items {
// Look for an item in some state like:
//
// X := ...x (*) Y ...z [K]
//
// or
//
// Y := ...x (*) [K]
//
// where `Y` is the nonterminal that we are looking for, and
// lookahead in `FIRST(...z, K)`.
// Note that these items might appear in the *same state*
}
}
}

View File

@ -0,0 +1,95 @@
use lr1::first::FirstSets;
use lr1::{BacktraceNode, Item, State, StateIndex};
use grammar::repr::*;
use session::Session;
use self::state_graph::StateGraph;
mod state_graph;
mod test;
pub struct Tracer<'trace, 'grammar: 'trace> {
session: &'trace Session,
states: &'trace [State<'grammar>],
first_sets: FirstSets,
state_graph: StateGraph,
}
impl<'trace, 'grammar> Tracer<'trace, 'grammar> {
pub fn new(session: &'trace Session,
grammar: &'grammar Grammar,
states: &'trace [State<'grammar>])
-> Self {
Tracer {
session: session,
states: states,
first_sets: FirstSets::new(grammar),
state_graph: StateGraph::new(states),
}
}
/// Returns a backtrace explaining how the state `item_state` came
/// to contain the item `item`:
///
/// NT = ... (*) ... [L]
///
/// In particular, how we came to be able to reduce `NT` with
/// lookahead `L`.
pub fn backtrace(&self, item_state: StateIndex, item: Item<'grammar>)
-> BacktraceNode<'grammar> {
log!(self.session, Debug, "backtrace(item_state={:?} item={:?})", item_state, item);
let mut result_node = BacktraceNode::new(item);
// The nonterminal NT and lookahead L we are looking for
let nt_sym = Symbol::Nonterminal(item.production.nonterminal);
let lookahead = item.lookahead;
// We will have arrived at the current state after pushing N
// symbols, where N is the number of items pushed so far in
// `item`. So walk backwards N states to find the state(s)
// where we had something like
//
// A := ... (*) NT ... [L1]
let pred_states = self.state_graph.predecessors_at_distance(item_state, item.index);
log!(self.session, Debug, "backtrace: pred_states={:?}", pred_states);
// For each such predecessor state P...
for pred_state in pred_states {
// ...scan the items in P, looking for one like:
//
// A := ... (*) NT ...x [L1]
//
// where the lookahead L is in FIRST(...x, L1).
for item in self.states[pred_state.0].items.vec.iter() {
log!(self.session, Debug, "backtrace: pred_state {:?} has item {:?}",
pred_state, item);
if let Some((shifted, remainder)) = item.shift_symbol() {
if shifted == nt_sym {
let (first, maybe_empty) = self.first_sets.first(remainder, item.lookahead);
log!(self.session, Debug, "backtrace: first={:?} maybe_empty={:?}",
first, maybe_empty);
if first.contains(&lookahead) {
// Found such a state. Now, continue
// tracing back so long as the lookahead
// may still have come from the
// surrounding context. This can occur if
// `...x` may be empty *and* the lookahead
// matches (if the lookahead doesn't
// match, then the only source for L is
// `...x`).
if maybe_empty && item.lookahead == lookahead {
let parent_node = self.backtrace(pred_state, *item);
result_node.parents.push(parent_node);
} else {
result_node.parents.push(BacktraceNode::new(*item));
}
}
}
}
}
}
result_node
}
}

View File

@ -0,0 +1,69 @@
use lr1::{Action, State, StateIndex};
use petgraph::{EdgeDirection, Graph};
use petgraph::graph::NodeIndex;
// Each state `s` corresponds to the node in the graph with index
// `s`. The edges are the shift transitions.
pub struct StateGraph {
graph: Graph<(), ()>
}
impl StateGraph {
pub fn new<'grammar>(states: &[State<'grammar>]) -> StateGraph {
let mut graph = Graph::new();
// First, create the nodes.
for i in 0..states.len() {
let j = graph.add_node(());
assert_eq!(i, j.index());
}
// Add in the edges.
for (i, state) in states.iter().enumerate() {
// Successors of a node arise from:
// - shifts (found in the `conflicts` and `tokens` maps)
// - gotos (found in the `gotos` map)
graph.extend_with_edges(
state.conflicts.values()
.flat_map(|conflicts| conflicts)
.map(|conflict| &conflict.action)
.chain(state.tokens.values())
.filter_map(|action| match *action {
Action::Shift(ref target) => Some(target),
Action::Reduce(_) => None,
})
.chain(state.gotos.values())
.map(|&successor| (NodeIndex::new(i), NodeIndex::new(successor.0))));
}
StateGraph { graph: graph }
}
pub fn predecessors_at_distance(&self,
state_index: StateIndex,
distance: usize)
-> Vec<StateIndex> {
let mut result = vec![];
let mut stack = Vec::new();
stack.push((state_index, 0));
while let Some((n, d)) = stack.pop() {
if d == distance {
result.push(n);
} else {
stack.extend(
self.graph.neighbors_directed(NodeIndex::new(n.0), EdgeDirection::Incoming)
.map(|pred| (StateIndex(pred.index()), d + 1)));
}
}
result
}
/// Given a state `s`, returns all states `p` where either `p ==
/// s` or `p` is an immediate predecessor of `s`.
pub fn predecessors_or_self(&self, state_index: StateIndex) -> Vec<StateIndex> {
self.graph.neighbors_directed(NodeIndex::new(state_index.0), EdgeDirection::Incoming)
.map(|n| StateIndex(n.index()))
.chain(Some(state_index))
.collect()
}
}

View File

@ -0,0 +1,162 @@
use intern::intern;
use grammar::parse_tree::TerminalLiteral;
use grammar::repr::*;
use lr1::build_states;
use lr1::interpret::interpret_partial;
use lr1::Lookahead;
use session::Session;
use test_util::{expect_debug, normalized_grammar};
use super::Tracer;
fn nt(t: &str) -> NonterminalString {
NonterminalString(intern(t))
}
fn term(t: &str) -> TerminalString {
TerminalString::Literal(TerminalLiteral::Quoted(intern(t)))
}
macro_rules! terms {
($($t:expr),*) => {
vec![$(term($t)),*]
}
}
fn test_grammar1() -> Grammar {
normalized_grammar(r#"
grammar;
pub Start: () = Stmt;
pub Stmt: () = {
Exprs ";",
Exprs
};
Exprs: () = {
Expr,
Exprs "," Expr
};
Expr: () = {
"Int",
Expr "+" "Int",
};
"#)
}
#[test]
fn backtrace1() {
let grammar = test_grammar1();
let session = Session::test();
let states = build_states(&session, &grammar, nt("Start")).unwrap();
let tracer = Tracer::new(&session, &grammar, &states);
let state_stack = interpret_partial(&states, terms!["Int"]).unwrap();
let top_state = *state_stack.last().unwrap();
// Top state will have items like:
//
// Expr = "Int" (*) [EOF],
// Expr = "Int" (*) ["+"],
// Expr = "Int" (*) [","],
// Expr = "Int" (*) [";"]
//
// Select the last one.
let semi = Lookahead::Terminal(term(";"));
let semi_item = states[top_state.0].items.vec.iter()
.filter(|item| item.lookahead == semi)
.next()
.unwrap();
let backtrace = tracer.backtrace(top_state, *semi_item);
expect_debug(&backtrace, r#"BacktraceNode {
item: Expr = "Int" (*) [";"],
parents: [
BacktraceNode {
item: Exprs = (*) Expr [";"],
parents: [
BacktraceNode {
item: Stmt = (*) Exprs ";" [EOF],
parents: []
}
]
},
BacktraceNode {
item: Exprs = Exprs "," (*) Expr [";"],
parents: [
BacktraceNode {
item: Stmt = (*) Exprs ";" [EOF],
parents: []
}
]
}
]
}"#);
}
#[test]
fn backtrace2() {
let grammar = test_grammar1();
let session = Session::test();
let states = build_states(&session, &grammar, nt("Start")).unwrap();
let tracer = Tracer::new(&session, &grammar, &states);
let state_stack = interpret_partial(&states, terms!["Int"]).unwrap();
let top_state = *state_stack.last().unwrap();
// Top state will have items like:
//
// Expr = "Int" (*) [EOF],
// Expr = "Int" (*) ["+"],
// Expr = "Int" (*) [","],
// Expr = "Int" (*) [";"]
//
// Select the last one.
let plus = Lookahead::Terminal(term("+"));
let plus_item = states[top_state.0].items.vec.iter()
.filter(|item| item.lookahead == plus)
.next()
.unwrap();
let backtrace = tracer.backtrace(top_state, *plus_item);
println!("{:#?}", backtrace);
expect_debug(&backtrace, r#"BacktraceNode {
item: Expr = "Int" (*) ["+"],
parents: [
BacktraceNode {
item: Expr = (*) Expr "+" "Int" [EOF],
parents: []
},
BacktraceNode {
item: Expr = (*) Expr "+" "Int" ["+"],
parents: []
},
BacktraceNode {
item: Expr = (*) Expr "+" "Int" [","],
parents: []
},
BacktraceNode {
item: Expr = (*) Expr "+" "Int" [";"],
parents: []
},
BacktraceNode {
item: Expr = (*) Expr "+" "Int" [EOF],
parents: []
},
BacktraceNode {
item: Expr = (*) Expr "+" "Int" ["+"],
parents: []
},
BacktraceNode {
item: Expr = (*) Expr "+" "Int" [","],
parents: []
},
BacktraceNode {
item: Expr = (*) Expr "+" "Int" [";"],
parents: []
}
]
}"#);
}

View File

@ -96,11 +96,16 @@ impl<'session, 'grammar> LR1<'session, 'grammar> {
production: item.production,
action: conflict,
});
errors += 1;
}
}
// extract a new state
states.push(this_state);
if self.session.stop_after(errors) {
break;
}
}
if states.iter().any(|s| !s.conflicts.is_empty()) {
@ -151,7 +156,7 @@ impl<'session, 'grammar> LR1<'session, 'grammar> {
}
})
.flat_map(|(nt, remainder, lookahead)| {
let first_set = self.first_sets.first(remainder, lookahead);
let (first_set, _) = self.first_sets.first(remainder, lookahead);
first_set.into_iter()
.flat_map(move |l| self.items(nt, 0, l))
})

View File

@ -22,7 +22,7 @@ impl FirstSets {
changed = false;
for production in grammar.nonterminals.values().flat_map(|p| &p.productions) {
let nt = production.nonterminal;
let lookahead = this.first(&production.symbols, Lookahead::EOF);
let (lookahead, _) = this.first(&production.symbols, Lookahead::EOF);
let first_set = this.map.entry(nt).or_insert_with(|| set());
let cardinality = first_set.len();
first_set.extend(
@ -37,14 +37,14 @@ impl FirstSets {
this
}
pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> (Vec<Lookahead>, bool) {
let mut result = vec![];
for symbol in symbols {
match *symbol {
Symbol::Terminal(t) => {
result.push(Lookahead::Terminal(t));
return result;
return (result, false);
}
Symbol::Nonterminal(nt) => {
@ -70,14 +70,14 @@ impl FirstSets {
}
}
if !empty_prod {
return result;
return (result, false);
}
}
}
}
result.push(lookahead);
result
(result, true)
}
}

View File

@ -18,7 +18,7 @@ fn la(t: &str) -> Lookahead {
}
fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
let mut v = first.first(symbols, lookahead);
let (mut v, _) = first.first(symbols, lookahead);
v.sort();
v
}

View File

@ -1,6 +1,6 @@
//! LR(1) interpeter. Just builds up parse trees. Intended for testing.
use lr1::{Action, State, Lookahead};
use lr1::{Action, State, StateIndex, Lookahead};
use generate::ParseTree;
use grammar::repr::*;
use std::iter::IntoIterator;
@ -9,6 +9,7 @@ use util::Sep;
pub type InterpretError<'grammar> = (&'grammar State<'grammar>, Lookahead);
/// Feed in the given tokens and then EOF, returning the final parse tree that is reduced.
pub fn interpret<'grammar,TOKENS>(states: &'grammar [State<'grammar>], tokens: TOKENS)
-> Result<ParseTree, InterpretError<'grammar>>
where TOKENS: IntoIterator<Item=TerminalString>
@ -17,9 +18,19 @@ pub fn interpret<'grammar,TOKENS>(states: &'grammar [State<'grammar>], tokens: T
m.execute(tokens.into_iter())
}
/// Feed in the given tokens and returns the states on the stack.
pub fn interpret_partial<'grammar,TOKENS>(states: &'grammar [State<'grammar>], tokens: TOKENS)
-> Result<Vec<StateIndex>, InterpretError<'grammar>>
where TOKENS: IntoIterator<Item=TerminalString>
{
let mut m = Machine::new(states);
try!(m.execute_partial(tokens.into_iter()));
Ok(m.state_stack)
}
struct Machine<'grammar> {
states: &'grammar [State<'grammar>],
state_stack: Vec<&'grammar State<'grammar>>,
state_stack: Vec<StateIndex>,
data_stack: Vec<ParseTree>,
}
@ -30,18 +41,23 @@ impl<'grammar> Machine<'grammar> {
data_stack: vec![] }
}
fn execute<TOKENS>(&mut self, mut tokens: TOKENS)
-> Result<ParseTree, InterpretError<'grammar>>
fn top_state(&self) -> &'grammar State<'grammar> {
let index = self.state_stack.last().unwrap();
&self.states[index.0]
}
fn execute_partial<TOKENS>(&mut self, mut tokens: TOKENS)
-> Result<(), InterpretError<'grammar>>
where TOKENS: Iterator<Item=TerminalString>
{
assert!(self.state_stack.is_empty());
assert!(self.data_stack.is_empty());
self.state_stack.push(&self.states[0]);
self.state_stack.push(StateIndex(0));
let mut token = tokens.next();
while let Some(terminal) = token {
let state = *self.state_stack.last().unwrap();
let state = self.top_state();
// check whether we can shift this token
match state.tokens.get(&Lookahead::Terminal(terminal)) {
@ -49,7 +65,7 @@ impl<'grammar> Machine<'grammar> {
Some(&Action::Shift(next_index)) => {
self.data_stack.push(ParseTree::Terminal(terminal));
self.state_stack.push(&self.states[next_index.0]);
self.state_stack.push(next_index);
token = tokens.next();
}
@ -60,9 +76,18 @@ impl<'grammar> Machine<'grammar> {
}
}
Ok(())
}
fn execute<TOKENS>(&mut self, tokens: TOKENS)
-> Result<ParseTree, InterpretError<'grammar>>
where TOKENS: Iterator<Item=TerminalString>
{
try!(self.execute_partial(tokens));
// drain now for EOF
loop {
let state = *self.state_stack.last().unwrap();
let state = self.top_state();
match state.tokens.get(&Lookahead::EOF) {
None => { return Err((state, Lookahead::EOF)); }
Some(&Action::Shift(_)) => { unreachable!("cannot shift EOF") }
@ -96,10 +121,10 @@ impl<'grammar> Machine<'grammar> {
self.data_stack.push(tree);
// recover the state and extract the "Goto" action
let receiving_state = *self.state_stack.last().unwrap();
let receiving_state = self.top_state();
match receiving_state.gotos.get(&production.nonterminal) {
Some(goto_state) => {
self.state_stack.push(&self.states[goto_state.0]);
Some(&goto_state) => {
self.state_stack.push(goto_state);
true // keep going
}
None => {

View File

@ -9,6 +9,7 @@ use util::{Map, Prefix};
pub mod ascent;
mod backtrace;
mod core;
mod error;
mod first;
@ -51,7 +52,7 @@ struct Items<'grammar> {
}
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct StateIndex(usize);
pub struct StateIndex(usize);
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Lookahead {
@ -66,6 +67,28 @@ struct Item<'grammar> {
lookahead: Lookahead,
}
/// Stores a backtrace tree used in error reporting. Consider a simple
/// example where we want the backtrace of EXPR with lookahead `,`,
/// given this grammar:
///
/// START = EXPRS ";"
/// | EXPRS
/// EXPRS = EXPR
/// | EXPRS "," EXPR
/// EXPR = ...
///
/// We would result in a sort of inverted tree like:
///
/// EXPR = ... (*) [","]
/// EXPRS = (*) EXPR [","]
/// EXPRS = (*) EXPRS "," EXPR [";"]
/// EXPRS = (*) EXPRS "," EXPR [EOF]
#[derive(Debug)]
struct BacktraceNode<'grammar> {
item: Item<'grammar>,
parents: Vec<BacktraceNode<'grammar>>,
}
#[derive(Debug)]
pub struct TableConstructionError<'grammar> {
// LR(1) state set. Some of these states are in error.
@ -180,3 +203,9 @@ impl<'grammar> Action<'grammar> {
}
}
}
impl<'grammar> BacktraceNode<'grammar> {
fn new(item: Item<'grammar>) -> Self {
BacktraceNode { item: item, parents: vec![] }
}
}

View File

@ -4,6 +4,11 @@ use log::{Log, Level};
pub struct Session {
log: Log,
force_build: bool,
/// Stop after you find `max_errors` errors. If this value is 0,
/// report *all* errors. Note that we MAY always report more than
/// this value if we so choose.
max_errors: usize,
}
impl Session {
@ -11,6 +16,7 @@ impl Session {
Session {
log: Log::new(Level::Informative),
force_build: false,
max_errors: 1,
}
}
@ -20,6 +26,7 @@ impl Session {
Session {
log: Log::new(Level::Debug),
force_build: false,
max_errors: 1,
}
}
@ -27,10 +34,20 @@ impl Session {
self.force_build = true;
}
pub fn set_max_errors(&mut self, errors: usize) {
self.max_errors = errors;
}
pub fn set_log_level(&mut self, level: Level) {
self.log.set_level(level);
}
/// Indicates whether we should stop after `actual_errors` number
/// of errors have been reported.
pub fn stop_after(&self, actual_errors: usize) -> bool {
self.max_errors != 0 && actual_errors >= self.max_errors
}
pub fn force_build(&self) -> bool {
self.force_build
}