WIP

2025-03-16 17:00:53 +00:00 · 2016-02-02 04:52:33 -05:00 · 2016-02-02 04:52:33 -05:00 · 52e7208239
commit 52e7208239
parent e45d27bc3d
5 changed files with 171 additions and 62 deletions
--- a/lalrpop/src/lr1/backtrace.rs
+++ b/lalrpop/src/lr1/backtrace.rs
@ -0,0 +1,94 @@
+use petgraph::graph::{Graph, NodeIndex};
+
+struct Backtrace<'grammar> {
+    items: Vec<Item<'grammar>>
+}
+
+/// Given that the state `state` can reduce `production` when seeing
+/// `lookahead` --- in other words, that it contains
+///
+///    NT = ... (*) [L]
+///
+/// where `production = NT = ...` --- then this function traverses the
+/// state graph to give a backtrace explaining *why* this state exists
+/// with that production and lookahead in the first place.
+pub fn backtraces(states: &[State<'grammar>],
+                  state: StateIndex,
+                  production: &'grammar Production,
+                  lookahead: Lookaround)
+                  -> Vec<Backtrace<'grammar>> {
+}
+
+// Each state `s` corresponds to the node in the graph with index
+// `s`. The edges are the shift transitions.
+type StateGraph = Graph<(), TerminalString>;
+
+fn state_graph(states: &[State<'grammar>]) -> StateGraph {
+    let mut graph = Graph::new();
+
+    // First, create the nodes.
+    for i in 0..states.len() {
+        let j = graph.add_node(());
+        assert_eq!(i, j);
+    }
+
+    // Add in the edges.
+    for (i, state) in states.iter().enumerate() {
+        let all_actions =
+            state.conflicts.iter()
+                           .flat_map(|(lookahead, conflicts)| {
+                               conflicts.iter()
+                                        .map(|c| (lookahead, &c.action))
+                           })
+                           .chain(state.actions.iter());
+        for (&lookahead, action) in all_actions {
+            match action {
+                Action::Shift(target) => { graph.add_edge(i, target.0 as usize, lookahead); }
+                Action::Reduce(_) => { }
+            }
+        }
+    }
+}
+
+{
+    if item.index == 0 {
+        // The item that caused a conflict looked like:
+        //
+        //     X := (*) ...y [K]
+        //
+        // This could arise in two scenarios. Either there
+        // is another item in the same state like:
+        //
+        //     Z := ... (*) X ...
+        //
+        // and hence we added the conflicting item due to an epsilon
+        // move, or else in some other item we have:
+        
+        
+
+        //     
+        
+        //
+        //     
+    }
+
+    
+    for state in states {
+        for item in &state.items {
+            // Look for an item in some state like:
+            //
+            //     X := ...x (*) Y ...z [K]
+            //
+            // or
+            //
+            //     Y := ...x (*) [K]
+            //
+            // where `Y` is the nonterminal that we are looking for, and
+            // lookahead in `FIRST(...z, K)`.
+            // Note that these items might appear in the *same state*
+            
+            
+        }
+    }
+}
+
--- a/lalrpop/src/lr1/core/mod.rs
+++ b/lalrpop/src/lr1/core/mod.rs
@ -4,7 +4,7 @@ use kernel_set;
 use session::Session;
 use grammar::repr::*;
 use lr1::first;
-use lr1::{Action, Lookahead, Item, Items, State, StateIndex, TableConstructionError};
+use lr1::{Action, Conflict, Lookahead, Item, Items, State, StateIndex, TableConstructionError};
 use std::rc::Rc;
 use util::{map, Multimap, Set};

@ -40,6 +40,7 @@ impl<'session, 'grammar> LR1<'session, 'grammar> {
    {
        let mut kernel_set = kernel_set::KernelSet::new();
        let mut states = vec![];
+        let mut errors = 0;

        // create the starting state
        kernel_set.add_state(
@ -52,7 +53,8 @@ impl<'session, 'grammar> LR1<'session, 'grammar> {
                 index, items.vec.len());

            let mut this_state = State { index: index, items: items.clone(),
-                                         tokens: map(), gotos: map() };
+                                         tokens: map(), gotos: map(),
+                                         conflicts: map() };

            // group the items that we can transition into by shifting
            // over a term or nonterm
@ -87,14 +89,13 @@ impl<'session, 'grammar> LR1<'session, 'grammar> {
                let action = Action::Reduce(item.production);
                let prev = this_state.tokens.insert(item.lookahead, action);
                if let Some(conflict) = prev {
-                    return Err(TableConstructionError {
-                        states: Some(states),
-                        index: index,
-                        items: items.clone(),
-                        lookahead: item.lookahead,
-                        production: item.production,
-                        conflict: conflict,
-                    });
+                    this_state.conflicts.entry(item.lookahead)
+                                        .or_insert(vec![])
+                                        .push(Conflict {
+                                            state: index,
+                                            production: item.production,
+                                            action: conflict,
+                                        });
                }
            }

@ -102,7 +103,11 @@ impl<'session, 'grammar> LR1<'session, 'grammar> {
            states.push(this_state);
        }

-        Ok(states)
+        if states.iter().any(|s| !s.conflicts.is_empty()) {
+            Err(TableConstructionError { states: states })
+        } else {
+            Ok(states)
+        }
    }

    fn items(&self,
--- a/lalrpop/src/lr1/error.rs
+++ b/lalrpop/src/lr1/error.rs
@ -2,21 +2,38 @@

 use grammar::repr::Grammar;
 use std::io::{self, Write};
-
-use super::{Action, TableConstructionError};
+use super::{Action, Conflict, Lookahead, State, TableConstructionError};

 pub fn report_error<'grammar>(out: &mut Write,
                              _grammar: &'grammar Grammar,
                              error: &TableConstructionError<'grammar>)
                              -> io::Result<()>
+{
+    for state in &error.states {
+        for (&lookahead, conflicts) in &state.conflicts {
+            for conflict in conflicts {
+                try!(report_error_naive(out, &error.states, lookahead, conflict));
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Naive error reporting. This is still used for LALR(1) reduction
+/// errors but ought to be phased out completely, I imagine.
+fn report_error_naive<'grammar>(out: &mut Write,
+                                states: &[State<'grammar>],
+                                lookahead: Lookahead,
+                                conflict: &Conflict<'grammar>)
+                                -> io::Result<()>
 {
    try!(writeln!(out, "when in this state:"));
-    for item in error.items.vec.iter() {
+    for item in states[conflict.state.0].items.vec.iter() {
        try!(writeln!(out, "  {:?}", item));
    }
-    try!(writeln!(out, "and looking at a token `{:?}`,", error.lookahead));
-    try!(writeln!(out, "we can reduce to a `{}`", error.production.nonterminal));
-    match error.conflict {
+    try!(writeln!(out, "and looking at a token `{:?}`,", lookahead));
+    try!(writeln!(out, "we can reduce to a `{}`", conflict.production.nonterminal));
+    match conflict.action {
        Action::Shift(_) =>
            try!(writeln!(out, "but we can also shift")),
        Action::Reduce(prod) =>
--- a/lalrpop/src/lr1/la0/mod.rs
+++ b/lalrpop/src/lr1/la0/mod.rs
@ -7,7 +7,7 @@ use session::Session;
 use std::rc::Rc;
 use util::{map, Map};
 use util::map::Entry;
-use super::{Action, State, StateIndex, Item, Items, Lookahead, TableConstructionError};
+use super::{Action, Conflict, Item, Items, Lookahead, State, StateIndex, TableConstructionError};
 use super::Action::{Reduce, Shift};

 #[cfg(test)]
@ -27,6 +27,7 @@ struct LALR1State<'grammar> {
    items: Vec<Item<'grammar>>,
    tokens: Map<Lookahead, Action<'grammar>>,
    gotos: Map<NonterminalString, StateIndex>,
+    conflicts: Map<Lookahead, Vec<Conflict<'grammar>>>,
 }

 pub fn lalr_states<'grammar>(session: &Session,
@ -67,7 +68,8 @@ pub fn collapse_to_lalr_states<'grammar>(lr_states: &[State<'grammar>])
                              index: index,
                              items: vec![],
                              tokens: map(),
-                              gotos: map()
+                              gotos: map(),
+                              conflicts: map(),
                          });
                          index
                      });
@ -93,8 +95,10 @@ pub fn collapse_to_lalr_states<'grammar>(lr_states: &[State<'grammar>])
                Entry::Occupied(slot) => {
                    let old_action = *slot.get();
                    if old_action != lalr1_action {
-                        return Err(conflict(lalr1_index, &lalr1_state.items, lookahead,
-                                            old_action, lalr1_action));
+                        lalr1_state.conflicts
+                                   .entry(lookahead)
+                                   .or_insert(vec![])
+                                   .push(conflict(lalr1_index, old_action, lalr1_action));
                    }
                }
                Entry::Vacant(slot) => {
@ -119,38 +123,38 @@ pub fn collapse_to_lalr_states<'grammar>(lr_states: &[State<'grammar>])
    }

    // Finally, create the new states
-    Ok(
+    let lr1_states: Vec<_> =
        lalr1_states.into_iter()
                    .map(|lr| State {
                        index: lr.index,
                        items: Items { vec: Rc::new(lr.items) },
                        tokens: lr.tokens,
-                        gotos: lr.gotos
+                        gotos: lr.gotos,
+                        conflicts: lr.conflicts,
                    })
-                    .collect())
+                    .collect();
+
+    if lr1_states.iter().any(|s| !s.conflicts.is_empty()) {
+        Err(TableConstructionError { states: lr1_states })
+    } else {
+        Ok(lr1_states)
+    }
 }

 fn conflict<'grammar>(index: StateIndex,
-                      items: &[Item<'grammar>],
-                      lookahead: Lookahead,
                      action1: Action<'grammar>,
                      action2: Action<'grammar>)
-                      -> TableConstructionError<'grammar> {
+                      -> Conflict<'grammar> {
    let (production, conflict) = match (action1, action2) {
        (c @ Shift(_), Reduce(p)) |
        (Reduce(p), c @ Shift(_)) |
        (Reduce(p), c @ Reduce(_)) => { (p, c) }
-        _ => {
-            panic!("conflict between {:?} and {:?}", action1, action2) 
-        }
+        _ => panic!("conflict between {:?} and {:?}", action1, action2)
    };

-    TableConstructionError {
-        states: None,
-        index: index,
-        items: Items { vec: Rc::new(items.to_vec()) },
-        lookahead: lookahead,
+    Conflict {
+        state: index,
        production: production,
-        conflict: conflict,
+        action: conflict,
    }
 }
--- a/lalrpop/src/lr1/mod.rs
+++ b/lalrpop/src/lr1/mod.rs
@ -23,6 +23,7 @@ pub struct State<'grammar> {
    index: StateIndex,
    items: Items<'grammar>,
    tokens: Map<Lookahead, Action<'grammar>>,
+    conflicts: Map<Lookahead, Vec<Conflict<'grammar>>>,
    gotos: Map<NonterminalString, StateIndex>,
 }

@ -32,6 +33,18 @@ enum Action<'grammar> {
    Reduce(&'grammar Production),
 }

+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+struct Conflict<'grammar> {
+    // when in this state...
+    state: StateIndex,
+
+    // we can reduce...
+    production: &'grammar Production,
+
+    // but we can also...
+    action: Action<'grammar>,
+}
+
 #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
 struct Items<'grammar> {
    vec: Rc<Vec<Item<'grammar>>>
@ -55,32 +68,8 @@ struct Item<'grammar> {

 #[derive(Debug)]
 pub struct TableConstructionError<'grammar> {
-    // completed states: note that these may reference states that
-    // were never processed, so you can't follow outgoing edges with
-    // impunity.
-    //
-    // This is optional because, in the current LALR(1) code, we don't
-    // have a notion of "complete" states to supply. Really we should
-    // special case the error reporting there. Or, better yet, make
-    // the LR(1) -> LALR(1) compression infallible, by just detecting
-    // when it's going to work. (Or, the other way, only use the full
-    // LR(1) when needed.)
-    states: Option<Vec<State<'grammar>>>,
-
-    // state index where we encountered a failure
-    index: StateIndex,
-
-    // when in this state:
-    items: Items<'grammar>,
-
-    // and looking at this token:
-    lookahead: Lookahead,
-
-    // we can reduce using this production:
-    production: &'grammar Production,
-
-    // but we can also:
-    conflict: Action<'grammar>,
+    // LR(1) state set. Some of these states are in error.
+    states: Vec<State<'grammar>>,
 }

 pub fn build_states<'grammar>(session: &Session,