Implement backtrace

2025-03-16 17:00:53 +00:00 · 2016-02-02 06:31:35 -05:00 · 2016-02-02 06:31:35 -05:00 · 22d3c904a9
commit 22d3c904a9
parent 52e7208239
10 changed files with 421 additions and 113 deletions
--- a/lalrpop/src/lr1/backtrace.rs
+++ b/lalrpop/src/lr1/backtrace.rs
@ -1,94 +0,0 @@
-use petgraph::graph::{Graph, NodeIndex};
-
-struct Backtrace<'grammar> {
-    items: Vec<Item<'grammar>>
-}
-
-/// Given that the state `state` can reduce `production` when seeing
-/// `lookahead` --- in other words, that it contains
-///
-///    NT = ... (*) [L]
-///
-/// where `production = NT = ...` --- then this function traverses the
-/// state graph to give a backtrace explaining *why* this state exists
-/// with that production and lookahead in the first place.
-pub fn backtraces(states: &[State<'grammar>],
-                  state: StateIndex,
-                  production: &'grammar Production,
-                  lookahead: Lookaround)
-                  -> Vec<Backtrace<'grammar>> {
-}
-
-// Each state `s` corresponds to the node in the graph with index
-// `s`. The edges are the shift transitions.
-type StateGraph = Graph<(), TerminalString>;
-
-fn state_graph(states: &[State<'grammar>]) -> StateGraph {
-    let mut graph = Graph::new();
-
-    // First, create the nodes.
-    for i in 0..states.len() {
-        let j = graph.add_node(());
-        assert_eq!(i, j);
-    }
-
-    // Add in the edges.
-    for (i, state) in states.iter().enumerate() {
-        let all_actions =
-            state.conflicts.iter()
-                           .flat_map(|(lookahead, conflicts)| {
-                               conflicts.iter()
-                                        .map(|c| (lookahead, &c.action))
-                           })
-                           .chain(state.actions.iter());
-        for (&lookahead, action) in all_actions {
-            match action {
-                Action::Shift(target) => { graph.add_edge(i, target.0 as usize, lookahead); }
-                Action::Reduce(_) => { }
-            }
-        }
-    }
-}
-
-{
-    if item.index == 0 {
-        // The item that caused a conflict looked like:
-        //
-        //     X := (*) ...y [K]
-        //
-        // This could arise in two scenarios. Either there
-        // is another item in the same state like:
-        //
-        //     Z := ... (*) X ...
-        //
-        // and hence we added the conflicting item due to an epsilon
-        // move, or else in some other item we have:
-        
-        
-
-        //     
-        
-        //
-        //     
-    }
-
-    
-    for state in states {
-        for item in &state.items {
-            // Look for an item in some state like:
-            //
-            //     X := ...x (*) Y ...z [K]
-            //
-            // or
-            //
-            //     Y := ...x (*) [K]
-            //
-            // where `Y` is the nonterminal that we are looking for, and
-            // lookahead in `FIRST(...z, K)`.
-            // Note that these items might appear in the *same state*
-            
-            
-        }
-    }
-}
-
--- a/lalrpop/src/lr1/backtrace/mod.rs
+++ b/lalrpop/src/lr1/backtrace/mod.rs
@ -0,0 +1,95 @@
+use lr1::first::FirstSets;
+use lr1::{BacktraceNode, Item, State, StateIndex};
+use grammar::repr::*;
+use session::Session;
+use self::state_graph::StateGraph;
+
+mod state_graph;
+mod test;
+
+pub struct Tracer<'trace, 'grammar: 'trace> {
+    session: &'trace Session,
+    states: &'trace [State<'grammar>],
+    first_sets: FirstSets,
+    state_graph: StateGraph,
+}
+
+impl<'trace, 'grammar> Tracer<'trace, 'grammar> {
+    pub fn new(session: &'trace Session,
+               grammar: &'grammar Grammar,
+               states: &'trace [State<'grammar>])
+               -> Self {
+        Tracer {
+            session: session,
+            states: states,
+            first_sets: FirstSets::new(grammar),
+            state_graph: StateGraph::new(states),
+        }
+    }
+
+    /// Returns a backtrace explaining how the state `item_state` came
+    /// to contain the item `item`:
+    ///
+    ///    NT = ... (*) ... [L]
+    ///
+    /// In particular, how we came to be able to reduce `NT` with
+    /// lookahead `L`.
+    pub fn backtrace(&self, item_state: StateIndex, item: Item<'grammar>)
+                     -> BacktraceNode<'grammar> {
+        log!(self.session, Debug, "backtrace(item_state={:?} item={:?})", item_state, item);
+
+        let mut result_node = BacktraceNode::new(item);
+
+        // The nonterminal NT and lookahead L we are looking for
+        let nt_sym = Symbol::Nonterminal(item.production.nonterminal);
+        let lookahead = item.lookahead;
+
+        // We will have arrived at the current state after pushing N
+        // symbols, where N is the number of items pushed so far in
+        // `item`. So walk backwards N states to find the state(s)
+        // where we had something like
+        //
+        //     A := ... (*) NT ... [L1]
+        let pred_states = self.state_graph.predecessors_at_distance(item_state, item.index);
+        log!(self.session, Debug, "backtrace: pred_states={:?}", pred_states);
+
+        // For each such predecessor state P...
+        for pred_state in pred_states {
+            // ...scan the items in P, looking for one like:
+            //
+            //     A := ... (*) NT ...x [L1]
+            //
+            // where the lookahead L is in FIRST(...x, L1).
+            for item in self.states[pred_state.0].items.vec.iter() {
+                log!(self.session, Debug, "backtrace: pred_state {:?} has item {:?}",
+                     pred_state, item);
+                if let Some((shifted, remainder)) = item.shift_symbol() {
+                    if shifted == nt_sym {
+                        let (first, maybe_empty) = self.first_sets.first(remainder, item.lookahead);
+                        log!(self.session, Debug, "backtrace: first={:?} maybe_empty={:?}",
+                             first, maybe_empty);
+                        if first.contains(&lookahead) {
+                            // Found such a state. Now, continue
+                            // tracing back so long as the lookahead
+                            // may still have come from the
+                            // surrounding context. This can occur if
+                            // `...x` may be empty *and* the lookahead
+                            // matches (if the lookahead doesn't
+                            // match, then the only source for L is
+                            // `...x`).
+                            if maybe_empty && item.lookahead == lookahead {
+                                let parent_node = self.backtrace(pred_state, *item);
+                                result_node.parents.push(parent_node);
+                            } else {
+                                result_node.parents.push(BacktraceNode::new(*item));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        result_node
+    }
+}
+
--- a/lalrpop/src/lr1/backtrace/state_graph.rs
+++ b/lalrpop/src/lr1/backtrace/state_graph.rs
@ -0,0 +1,69 @@
+use lr1::{Action, State, StateIndex};
+use petgraph::{EdgeDirection, Graph};
+use petgraph::graph::NodeIndex;
+
+// Each state `s` corresponds to the node in the graph with index
+// `s`. The edges are the shift transitions.
+pub struct StateGraph {
+    graph: Graph<(), ()>
+}
+
+impl StateGraph {
+    pub fn new<'grammar>(states: &[State<'grammar>]) -> StateGraph {
+        let mut graph = Graph::new();
+
+        // First, create the nodes.
+        for i in 0..states.len() {
+            let j = graph.add_node(());
+            assert_eq!(i, j.index());
+        }
+
+        // Add in the edges.
+        for (i, state) in states.iter().enumerate() {
+            // Successors of a node arise from:
+            // - shifts (found in the `conflicts` and `tokens` maps)
+            // - gotos (found in the `gotos` map)
+            graph.extend_with_edges(
+                state.conflicts.values()
+                               .flat_map(|conflicts| conflicts)
+                               .map(|conflict| &conflict.action)
+                               .chain(state.tokens.values())
+                               .filter_map(|action| match *action {
+                                   Action::Shift(ref target) => Some(target),
+                                   Action::Reduce(_) => None,
+                               })
+                               .chain(state.gotos.values())
+                               .map(|&successor| (NodeIndex::new(i), NodeIndex::new(successor.0))));
+        }
+
+        StateGraph { graph: graph }
+    }
+
+    pub fn predecessors_at_distance(&self,
+                                    state_index: StateIndex,
+                                    distance: usize)
+                                    -> Vec<StateIndex> {
+        let mut result = vec![];
+        let mut stack = Vec::new();
+        stack.push((state_index, 0));
+        while let Some((n, d)) = stack.pop() {
+            if d == distance {
+                result.push(n);
+            } else {
+                stack.extend(
+                    self.graph.neighbors_directed(NodeIndex::new(n.0), EdgeDirection::Incoming)
+                              .map(|pred| (StateIndex(pred.index()), d + 1)));
+            }
+        }
+        result
+    }
+
+    /// Given a state `s`, returns all states `p` where either `p ==
+    /// s` or `p` is an immediate predecessor of `s`.
+    pub fn predecessors_or_self(&self, state_index: StateIndex) -> Vec<StateIndex> {
+        self.graph.neighbors_directed(NodeIndex::new(state_index.0), EdgeDirection::Incoming)
+                  .map(|n| StateIndex(n.index()))
+                  .chain(Some(state_index))
+                  .collect()
+    }
+}
--- a/lalrpop/src/lr1/backtrace/test.rs
+++ b/lalrpop/src/lr1/backtrace/test.rs
@ -0,0 +1,162 @@
+use intern::intern;
+use grammar::parse_tree::TerminalLiteral;
+use grammar::repr::*;
+use lr1::build_states;
+use lr1::interpret::interpret_partial;
+use lr1::Lookahead;
+use session::Session;
+use test_util::{expect_debug, normalized_grammar};
+
+use super::Tracer;
+
+fn nt(t: &str) -> NonterminalString {
+    NonterminalString(intern(t))
+}
+
+fn term(t: &str) -> TerminalString {
+    TerminalString::Literal(TerminalLiteral::Quoted(intern(t)))
+}
+
+macro_rules! terms {
+    ($($t:expr),*) => {
+        vec![$(term($t)),*]
+    }
+}
+
+fn test_grammar1() -> Grammar {
+    normalized_grammar(r#"
+    grammar;
+
+    pub Start: () = Stmt;
+
+    pub Stmt: () = {
+        Exprs ";",
+        Exprs
+    };
+
+    Exprs: () = {
+        Expr,
+        Exprs "," Expr
+    };
+
+    Expr: () = {
+        "Int",
+        Expr "+" "Int",
+    };
+"#)
+}
+
+#[test]
+fn backtrace1() {
+    let grammar = test_grammar1();
+    let session = Session::test();
+    let states = build_states(&session, &grammar, nt("Start")).unwrap();
+    let tracer = Tracer::new(&session, &grammar, &states);
+    let state_stack = interpret_partial(&states, terms!["Int"]).unwrap();
+    let top_state = *state_stack.last().unwrap();
+
+    // Top state will have items like:
+    //
+    // Expr = "Int" (*) [EOF],
+    // Expr = "Int" (*) ["+"],
+    // Expr = "Int" (*) [","],
+    // Expr = "Int" (*) [";"]
+    //
+    // Select the last one.
+    let semi = Lookahead::Terminal(term(";"));
+    let semi_item = states[top_state.0].items.vec.iter()
+                                                 .filter(|item| item.lookahead == semi)
+                                                 .next()
+                                                 .unwrap();
+
+    let backtrace = tracer.backtrace(top_state, *semi_item);
+
+    expect_debug(&backtrace, r#"BacktraceNode {
+    item: Expr = "Int" (*) [";"],
+    parents: [
+        BacktraceNode {
+            item: Exprs = (*) Expr [";"],
+            parents: [
+                BacktraceNode {
+                    item: Stmt = (*) Exprs ";" [EOF],
+                    parents: []
+                }
+            ]
+        },
+        BacktraceNode {
+            item: Exprs = Exprs "," (*) Expr [";"],
+            parents: [
+                BacktraceNode {
+                    item: Stmt = (*) Exprs ";" [EOF],
+                    parents: []
+                }
+            ]
+        }
+    ]
+}"#);
+}
+
+#[test]
+fn backtrace2() {
+    let grammar = test_grammar1();
+    let session = Session::test();
+    let states = build_states(&session, &grammar, nt("Start")).unwrap();
+    let tracer = Tracer::new(&session, &grammar, &states);
+    let state_stack = interpret_partial(&states, terms!["Int"]).unwrap();
+    let top_state = *state_stack.last().unwrap();
+
+    // Top state will have items like:
+    //
+    // Expr = "Int" (*) [EOF],
+    // Expr = "Int" (*) ["+"],
+    // Expr = "Int" (*) [","],
+    // Expr = "Int" (*) [";"]
+    //
+    // Select the last one.
+    let plus = Lookahead::Terminal(term("+"));
+    let plus_item = states[top_state.0].items.vec.iter()
+                                                 .filter(|item| item.lookahead == plus)
+                                                 .next()
+                                                 .unwrap();
+
+    let backtrace = tracer.backtrace(top_state, *plus_item);
+
+    println!("{:#?}", backtrace);
+    expect_debug(&backtrace, r#"BacktraceNode {
+    item: Expr = "Int" (*) ["+"],
+    parents: [
+        BacktraceNode {
+            item: Expr = (*) Expr "+" "Int" [EOF],
+            parents: []
+        },
+        BacktraceNode {
+            item: Expr = (*) Expr "+" "Int" ["+"],
+            parents: []
+        },
+        BacktraceNode {
+            item: Expr = (*) Expr "+" "Int" [","],
+            parents: []
+        },
+        BacktraceNode {
+            item: Expr = (*) Expr "+" "Int" [";"],
+            parents: []
+        },
+        BacktraceNode {
+            item: Expr = (*) Expr "+" "Int" [EOF],
+            parents: []
+        },
+        BacktraceNode {
+            item: Expr = (*) Expr "+" "Int" ["+"],
+            parents: []
+        },
+        BacktraceNode {
+            item: Expr = (*) Expr "+" "Int" [","],
+            parents: []
+        },
+        BacktraceNode {
+            item: Expr = (*) Expr "+" "Int" [";"],
+            parents: []
+        }
+    ]
+}"#);
+}
--- a/lalrpop/src/lr1/core/mod.rs
+++ b/lalrpop/src/lr1/core/mod.rs
@ -96,11 +96,16 @@ impl<'session, 'grammar> LR1<'session, 'grammar> {
                                            production: item.production,
                                            action: conflict,
                                        });
+                    errors += 1;
                }
            }

            // extract a new state
            states.push(this_state);
+
+            if self.session.stop_after(errors) {
+                break;
+            }
        }

        if states.iter().any(|s| !s.conflicts.is_empty()) {
@ -151,7 +156,7 @@ impl<'session, 'grammar> LR1<'session, 'grammar> {
                    }
                })
                .flat_map(|(nt, remainder, lookahead)| {
-                    let first_set = self.first_sets.first(remainder, lookahead);
+                    let (first_set, _) = self.first_sets.first(remainder, lookahead);
                    first_set.into_iter()
                             .flat_map(move |l| self.items(nt, 0, l))
                })
--- a/lalrpop/src/lr1/first/mod.rs
+++ b/lalrpop/src/lr1/first/mod.rs
@ -22,7 +22,7 @@ impl FirstSets {
            changed = false;
            for production in grammar.nonterminals.values().flat_map(|p| &p.productions) {
                let nt = production.nonterminal;
-                let lookahead = this.first(&production.symbols, Lookahead::EOF);
+                let (lookahead, _) = this.first(&production.symbols, Lookahead::EOF);
                let first_set = this.map.entry(nt).or_insert_with(|| set());
                let cardinality = first_set.len();
                first_set.extend(
@ -37,14 +37,14 @@ impl FirstSets {
        this
    }

-    pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
+    pub fn first(&self, symbols: &[Symbol], lookahead: Lookahead) -> (Vec<Lookahead>, bool) {
        let mut result = vec![];

        for symbol in symbols {
            match *symbol {
                Symbol::Terminal(t) => {
                    result.push(Lookahead::Terminal(t));
-                    return result;
+                    return (result, false);
                }

                Symbol::Nonterminal(nt) => {
@ -70,14 +70,14 @@ impl FirstSets {
                        }
                    }
                    if !empty_prod {
-                        return result;
+                        return (result, false);
                    }
                }
            }
        }

        result.push(lookahead);
-        result
+        (result, true)
    }
 }

--- a/lalrpop/src/lr1/first/test.rs
+++ b/lalrpop/src/lr1/first/test.rs
@ -18,7 +18,7 @@ fn la(t: &str) -> Lookahead {
 }

 fn first(first: &FirstSets, symbols: &[Symbol], lookahead: Lookahead) -> Vec<Lookahead> {
-    let mut v = first.first(symbols, lookahead);
+    let (mut v, _) = first.first(symbols, lookahead);
    v.sort();
    v
 }
--- a/lalrpop/src/lr1/interpret.rs
+++ b/lalrpop/src/lr1/interpret.rs
@ -1,6 +1,6 @@
 //! LR(1) interpeter. Just builds up parse trees. Intended for testing.

-use lr1::{Action, State, Lookahead};
+use lr1::{Action, State, StateIndex, Lookahead};
 use generate::ParseTree;
 use grammar::repr::*;
 use std::iter::IntoIterator;
@ -9,6 +9,7 @@ use util::Sep;

 pub type InterpretError<'grammar> = (&'grammar State<'grammar>, Lookahead);

+/// Feed in the given tokens and then EOF, returning the final parse tree that is reduced.
 pub fn interpret<'grammar,TOKENS>(states: &'grammar [State<'grammar>], tokens: TOKENS)
                         -> Result<ParseTree, InterpretError<'grammar>>
    where TOKENS: IntoIterator<Item=TerminalString>
@ -17,9 +18,19 @@ pub fn interpret<'grammar,TOKENS>(states: &'grammar [State<'grammar>], tokens: T
    m.execute(tokens.into_iter())
 }

+/// Feed in the given tokens and returns the states on the stack.
+pub fn interpret_partial<'grammar,TOKENS>(states: &'grammar [State<'grammar>], tokens: TOKENS)
+                                          -> Result<Vec<StateIndex>, InterpretError<'grammar>>
+    where TOKENS: IntoIterator<Item=TerminalString>
+{
+    let mut m = Machine::new(states);
+    try!(m.execute_partial(tokens.into_iter()));
+    Ok(m.state_stack)
+}
+
 struct Machine<'grammar> {
    states: &'grammar [State<'grammar>],
-    state_stack: Vec<&'grammar State<'grammar>>,
+    state_stack: Vec<StateIndex>,
    data_stack: Vec<ParseTree>,
 }

@ -30,18 +41,23 @@ impl<'grammar> Machine<'grammar> {
                  data_stack: vec![] }
    }

-    fn execute<TOKENS>(&mut self, mut tokens: TOKENS)
-                       -> Result<ParseTree, InterpretError<'grammar>>
+    fn top_state(&self) -> &'grammar State<'grammar> {
+        let index = self.state_stack.last().unwrap();
+        &self.states[index.0]
+    }
+
+    fn execute_partial<TOKENS>(&mut self, mut tokens: TOKENS)
+                               -> Result<(), InterpretError<'grammar>>
        where TOKENS: Iterator<Item=TerminalString>
    {
        assert!(self.state_stack.is_empty());
        assert!(self.data_stack.is_empty());

-        self.state_stack.push(&self.states[0]);
+        self.state_stack.push(StateIndex(0));

        let mut token = tokens.next();
        while let Some(terminal) = token {
-            let state = *self.state_stack.last().unwrap();
+            let state = self.top_state();

            // check whether we can shift this token
            match state.tokens.get(&Lookahead::Terminal(terminal)) {
@ -49,7 +65,7 @@ impl<'grammar> Machine<'grammar> {

                Some(&Action::Shift(next_index)) => {
                    self.data_stack.push(ParseTree::Terminal(terminal));
-                    self.state_stack.push(&self.states[next_index.0]);
+                    self.state_stack.push(next_index);
                    token = tokens.next();
                }

@ -60,9 +76,18 @@ impl<'grammar> Machine<'grammar> {
            }
        }

+        Ok(())
+    }
+
+    fn execute<TOKENS>(&mut self, tokens: TOKENS)
+                           -> Result<ParseTree, InterpretError<'grammar>>
+        where TOKENS: Iterator<Item=TerminalString>
+    {
+        try!(self.execute_partial(tokens));
+
        // drain now for EOF
        loop {
-            let state = *self.state_stack.last().unwrap();
+            let state = self.top_state();
            match state.tokens.get(&Lookahead::EOF) {
                None => { return Err((state, Lookahead::EOF)); }
                Some(&Action::Shift(_)) => { unreachable!("cannot shift EOF") }
@ -96,10 +121,10 @@ impl<'grammar> Machine<'grammar> {
        self.data_stack.push(tree);

        // recover the state and extract the "Goto" action
-        let receiving_state = *self.state_stack.last().unwrap();
+        let receiving_state = self.top_state();
        match receiving_state.gotos.get(&production.nonterminal) {
-            Some(goto_state) => {
-                self.state_stack.push(&self.states[goto_state.0]);
+            Some(&goto_state) => {
+                self.state_stack.push(goto_state);
                true // keep going
            }
            None => {
--- a/lalrpop/src/lr1/mod.rs
+++ b/lalrpop/src/lr1/mod.rs
@ -9,6 +9,7 @@ use util::{Map, Prefix};

 pub mod ascent;

+mod backtrace;
 mod core;
 mod error;
 mod first;
@ -51,7 +52,7 @@ struct Items<'grammar> {
 }

 #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
-struct StateIndex(usize);
+pub struct StateIndex(usize);

 #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
 pub enum Lookahead {
@ -66,6 +67,28 @@ struct Item<'grammar> {
    lookahead: Lookahead,
 }

+/// Stores a backtrace tree used in error reporting. Consider a simple
+/// example where we want the backtrace of EXPR with lookahead `,`,
+/// given this grammar:
+///
+///     START = EXPRS ";"
+///           | EXPRS
+///     EXPRS = EXPR
+///           | EXPRS "," EXPR
+///     EXPR = ...
+///
+/// We would result in a sort of inverted tree like:
+///
+///     EXPR = ... (*) [","]
+///         EXPRS = (*) EXPR [","]
+///             EXPRS = (*) EXPRS "," EXPR [";"]
+///             EXPRS = (*) EXPRS "," EXPR [EOF]
+#[derive(Debug)]
+struct BacktraceNode<'grammar> {
+    item: Item<'grammar>,
+    parents: Vec<BacktraceNode<'grammar>>,
+}
+
 #[derive(Debug)]
 pub struct TableConstructionError<'grammar> {
    // LR(1) state set. Some of these states are in error.
@ -180,3 +203,9 @@ impl<'grammar> Action<'grammar> {
        }
    }
 }
+
+impl<'grammar> BacktraceNode<'grammar> {
+    fn new(item: Item<'grammar>) -> Self {
+        BacktraceNode { item: item, parents: vec![] }
+    }
+}
--- a/lalrpop/src/session.rs
+++ b/lalrpop/src/session.rs
@ -4,6 +4,11 @@ use log::{Log, Level};
 pub struct Session {
    log: Log,
    force_build: bool,
+
+    /// Stop after you find `max_errors` errors. If this value is 0,
+    /// report *all* errors. Note that we MAY always report more than
+    /// this value if we so choose.
+    max_errors: usize,
 }

 impl Session {
@ -11,6 +16,7 @@ impl Session {
        Session {
            log: Log::new(Level::Informative),
            force_build: false,
+            max_errors: 1,
        }
    }

@ -20,6 +26,7 @@ impl Session {
        Session {
            log: Log::new(Level::Debug),
            force_build: false,
+            max_errors: 1,
        }
    }

@ -27,10 +34,20 @@ impl Session {
        self.force_build = true;
    }

+    pub fn set_max_errors(&mut self, errors: usize) {
+        self.max_errors = errors;
+    }
+
    pub fn set_log_level(&mut self, level: Level) {
        self.log.set_level(level);
    }

+    /// Indicates whether we should stop after `actual_errors` number
+    /// of errors have been reported.
+    pub fn stop_after(&self, actual_errors: usize) -> bool {
+        self.max_errors != 0 && actual_errors >= self.max_errors
+    }
+
    pub fn force_build(&self) -> bool {
        self.force_build
    }