From 077ed9060d636f8630a8b965790e3222f0df964f Mon Sep 17 00:00:00 2001
From: Niko Matsakis <niko@alum.mit.edu>
Date: Fri, 19 Jun 2015 19:03:21 -0400
Subject: [PATCH] build up action tables

---
 src/lr1/mod.rs  |  65 ++++++++++++---
 src/lr1/test.rs | 211 +-----------------------------------------------
 2 files changed, 57 insertions(+), 219 deletions(-)

diff --git a/src/lr1/mod.rs b/src/lr1/mod.rs
index 47858a3..6b7cbfd 100644
--- a/src/lr1/mod.rs
+++ b/src/lr1/mod.rs
@@ -18,8 +18,14 @@ struct LR1<'grammar> {
 #[derive(Debug)]
 struct State<'grammar> {
     items: Items<'grammar>,
-    shifts: Vec<(TerminalString, StateIndex)>,
-    gotos: Vec<(NonterminalString, StateIndex)>,
+    tokens: Map<Lookahead, Action<'grammar>>,
+    gotos: Map<NonterminalString, StateIndex>,
+}
+
+#[derive(Debug)]
+enum Action<'grammar> {
+    Shift(StateIndex),
+    Reduce(&'grammar Production),
 }
 
 type Items<'grammar> = Rc<Vec<Item<'grammar>>>;
@@ -40,11 +46,27 @@ struct Item<'grammar> {
     lookahead: Lookahead,
 }
 
+#[derive(Debug)]
 struct StateSet<'grammar> {
     states: Vec<State<'grammar>>,
     state_map: Map<Items<'grammar>, StateIndex>,
 }
 
+#[derive(Debug)]
+pub struct TableConstructionError<'grammar> {
+    // when in this state:
+    items: Items<'grammar>,
+
+    // and looking at this token:
+    lookahead: Lookahead,
+
+    // we can reduce using this production:
+    production: &'grammar Production,
+
+    // but we can also:
+    conflict: Action<'grammar>,
+}
+
 impl<'grammar> LR1<'grammar> {
     fn new(grammar: &'grammar Grammar) -> LR1 {
         LR1 {
@@ -53,7 +75,9 @@ impl<'grammar> LR1<'grammar> {
         }
     }
 
-    fn build_states(&self, start_nt: NonterminalString) -> Vec<State<'grammar>> {
+    fn build_states(&self, start_nt: NonterminalString)
+                    -> Result<Vec<State<'grammar>>, TableConstructionError<'grammar>>
+    {
         let mut state_set = StateSet::new();
 
         // create the starting state
@@ -76,22 +100,41 @@ impl<'grammar> LR1<'grammar> {
                 let items = self.transitive_closure(items);
                 let next_state = state_set.add_state(items);
 
-                // FIXME check for conflicts
+                let this_state = &mut state_set.states[counter];
                 match symbol {
-                    Symbol::Terminal(t) => {
-                        state_set.states[counter].shifts.push((t, next_state));
+                    Symbol::Terminal(s) => {
+                        let action = Action::Shift(next_state);
+                        let prev = this_state.tokens.insert(Lookahead::Terminal(s), action);
+                        assert!(prev.is_none()); // cannot have a shift/shift conflict
                     }
-                    Symbol::Nonterminal(t) => {
-                        state_set.states[counter].gotos.push((t, next_state));
+
+                    Symbol::Nonterminal(s) => {
+                        let prev = this_state.gotos.insert(s, next_state);
+                        assert!(prev.is_none());
                     }
                 }
             }
 
+            // finally, consider the reductions
+            let this_state = &mut state_set.states[counter];
+            for item in items.iter().filter(|i| i.can_reduce()) {
+                let action = Action::Reduce(item.production);
+                let prev = this_state.tokens.insert(item.lookahead, action);
+                if let Some(conflict) = prev {
+                    return Err(TableConstructionError {
+                        items: items.clone(),
+                        lookahead: item.lookahead,
+                        production: item.production,
+                        conflict: conflict,
+                    });
+                }
+            }
+
             // extract a new state
             counter += 1;
         }
 
-        state_set.states
+        Ok(state_set.states)
     }
 
     fn items(&self,
@@ -191,9 +234,7 @@ impl<'grammar> StateSet<'grammar> {
         let states = &mut self.states;
         *self.state_map.entry(items.clone()).or_insert_with(|| {
             let index = StateIndex(states.len());
-            states.push(State { items: items,
-                                shifts: Vec::new(),
-                                gotos: Vec::new() });
+            states.push(State { items: items, tokens: map(), gotos: map() });
             index
         })
     }
diff --git a/src/lr1/test.rs b/src/lr1/test.rs
index d11b40b..6f66b57 100644
--- a/src/lr1/test.rs
+++ b/src/lr1/test.rs
@@ -92,212 +92,9 @@ grammar Foo {
 "#);
 
     let lr1 = LR1::new(&grammar);
-    let mut states = lr1.build_states(nt("S"));
-    for state in &mut states {
-        state.shifts.sort();
-        state.gotos.sort();
-    }
-    expect_debug(&states, r#"[
-    State {
-        items: [
-            S = (*) E [EOF],
-            E = (*) E "-" T [EOF],
-            E = (*) T [EOF],
-            E = (*) E "-" T ["-"],
-            E = (*) T ["-"],
-            T = (*) "N" [EOF],
-            T = (*) "(" E ")" [EOF],
-            T = (*) "N" ["-"],
-            T = (*) "(" E ")" ["-"]
-        ],
-        shifts: [
-            ("(", S4),
-            ("N", S3)
-        ],
-        gotos: [
-            (E, S2),
-            (T, S1)
-        ]
-    },
-    State {
-        items: [
-            E = T (*) [EOF],
-            E = T (*) ["-"]
-        ],
-        shifts: [],
-        gotos: []
-    },
-    State {
-        items: [
-            S = E (*) [EOF],
-            E = E (*) "-" T [EOF],
-            E = E (*) "-" T ["-"]
-        ],
-        shifts: [
-            ("-", S5)
-        ],
-        gotos: []
-    },
-    State {
-        items: [
-            T = "N" (*) [EOF],
-            T = "N" (*) ["-"]
-        ],
-        shifts: [],
-        gotos: []
-    },
-    State {
-        items: [
-            T = "(" (*) E ")" [EOF],
-            T = "(" (*) E ")" ["-"],
-            E = (*) E "-" T [")"],
-            E = (*) T [")"],
-            E = (*) E "-" T ["-"],
-            E = (*) T ["-"],
-            T = (*) "N" [")"],
-            T = (*) "(" E ")" [")"],
-            T = (*) "N" ["-"],
-            T = (*) "(" E ")" ["-"]
-        ],
-        shifts: [
-            ("(", S8),
-            ("N", S9)
-        ],
-        gotos: [
-            (E, S7),
-            (T, S6)
-        ]
-    },
-    State {
-        items: [
-            E = E "-" (*) T [EOF],
-            E = E "-" (*) T ["-"],
-            T = (*) "N" [EOF],
-            T = (*) "(" E ")" [EOF],
-            T = (*) "N" ["-"],
-            T = (*) "(" E ")" ["-"]
-        ],
-        shifts: [
-            ("(", S4),
-            ("N", S3)
-        ],
-        gotos: [
-            (T, S10)
-        ]
-    },
-    State {
-        items: [
-            E = T (*) [")"],
-            E = T (*) ["-"]
-        ],
-        shifts: [],
-        gotos: []
-    },
-    State {
-        items: [
-            T = "(" E (*) ")" [EOF],
-            T = "(" E (*) ")" ["-"],
-            E = E (*) "-" T [")"],
-            E = E (*) "-" T ["-"]
-        ],
-        shifts: [
-            (")", S12),
-            ("-", S11)
-        ],
-        gotos: []
-    },
-    State {
-        items: [
-            T = "(" (*) E ")" [")"],
-            T = "(" (*) E ")" ["-"],
-            E = (*) E "-" T [")"],
-            E = (*) T [")"],
-            E = (*) E "-" T ["-"],
-            E = (*) T ["-"],
-            T = (*) "N" [")"],
-            T = (*) "(" E ")" [")"],
-            T = (*) "N" ["-"],
-            T = (*) "(" E ")" ["-"]
-        ],
-        shifts: [
-            ("(", S8),
-            ("N", S9)
-        ],
-        gotos: [
-            (E, S13),
-            (T, S6)
-        ]
-    },
-    State {
-        items: [
-            T = "N" (*) [")"],
-            T = "N" (*) ["-"]
-        ],
-        shifts: [],
-        gotos: []
-    },
-    State {
-        items: [
-            E = E "-" T (*) [EOF],
-            E = E "-" T (*) ["-"]
-        ],
-        shifts: [],
-        gotos: []
-    },
-    State {
-        items: [
-            E = E "-" (*) T [")"],
-            E = E "-" (*) T ["-"],
-            T = (*) "N" [")"],
-            T = (*) "(" E ")" [")"],
-            T = (*) "N" ["-"],
-            T = (*) "(" E ")" ["-"]
-        ],
-        shifts: [
-            ("(", S8),
-            ("N", S9)
-        ],
-        gotos: [
-            (T, S14)
-        ]
-    },
-    State {
-        items: [
-            T = "(" E ")" (*) [EOF],
-            T = "(" E ")" (*) ["-"]
-        ],
-        shifts: [],
-        gotos: []
-    },
-    State {
-        items: [
-            T = "(" E (*) ")" [")"],
-            T = "(" E (*) ")" ["-"],
-            E = E (*) "-" T [")"],
-            E = E (*) "-" T ["-"]
-        ],
-        shifts: [
-            (")", S15),
-            ("-", S11)
-        ],
-        gotos: []
-    },
-    State {
-        items: [
-            E = E "-" T (*) [")"],
-            E = E "-" T (*) ["-"]
-        ],
-        shifts: [],
-        gotos: []
-    },
-    State {
-        items: [
-            T = "(" E ")" (*) [")"],
-            T = "(" E ")" (*) ["-"]
-        ],
-        shifts: [],
-        gotos: []
-    }
-]"#);
 
+    // for now, just test that process does not result in an error
+    // and yields expected number of states.
+    let states = lr1.build_states(nt("S")).unwrap();
+    assert_eq!(states.len(), 16);
 }