mirror of
https://github.com/fluencelabs/lalrpop
synced 2025-03-16 17:00:53 +00:00
Merge pull request #198 from nikomatsakis/lane-table-algorithm
Further progress towards lane table algorithm
This commit is contained in:
commit
b78b2aaaa1
@ -10,7 +10,7 @@
|
||||
|
||||
extern crate atty;
|
||||
extern crate bit_set;
|
||||
#[macro_use] extern crate bitflags;
|
||||
extern crate bitflags;
|
||||
extern crate diff;
|
||||
extern crate itertools;
|
||||
extern crate lalrpop_intern as intern;
|
||||
|
@ -17,9 +17,9 @@ doctest = false
|
||||
ascii-canvas = "1.0"
|
||||
atty = "0.1.2"
|
||||
bit-set = "0.4.0"
|
||||
bitflags = "0.8.0"
|
||||
diff = "0.1.9"
|
||||
docopt = "0.7"
|
||||
ena = "0.5"
|
||||
itertools = "0.5.9"
|
||||
regex = "0.2.1"
|
||||
regex-syntax = "0.2"
|
||||
|
@ -12,6 +12,7 @@ extern crate ascii_canvas;
|
||||
extern crate atty;
|
||||
extern crate bit_set;
|
||||
extern crate diff;
|
||||
extern crate ena;
|
||||
extern crate itertools;
|
||||
extern crate lalrpop_intern as intern;
|
||||
extern crate lalrpop_util;
|
||||
|
@ -118,7 +118,7 @@ pub type LR0Items<'grammar> = Items<'grammar, Nil>;
|
||||
#[allow(dead_code)]
|
||||
pub type LR1Items<'grammar> = Items<'grammar, TokenSet>;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct State<'grammar, L: Lookahead> {
|
||||
pub index: StateIndex,
|
||||
pub items: Items<'grammar, L>,
|
||||
@ -130,7 +130,7 @@ pub struct State<'grammar, L: Lookahead> {
|
||||
pub type LR0State<'grammar> = State<'grammar, Nil>;
|
||||
pub type LR1State<'grammar> = State<'grammar, TokenSet>;
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Action<'grammar> {
|
||||
Shift(TerminalString, StateIndex),
|
||||
Reduce(&'grammar Production),
|
||||
|
417
lalrpop/src/lr1/lane_table/README.md
Normal file
417
lalrpop/src/lr1/lane_table/README.md
Normal file
@ -0,0 +1,417 @@
|
||||
This module contains code for LR(1) construction based on a paper by
|
||||
Pager and Chen, "The Lane Table Method Of Constructing LR(1) Parsers",
|
||||
published in APPLC '12. Unfortunately, that paper is quite compact --
|
||||
only 8 pages! -- which doesn't leave much room for examples and
|
||||
explanation. This README is my attempt to explain the idea, or at
|
||||
least how I chose to implement it in LALRPOP, which may or may not be
|
||||
faithful to the original algorithm. Naturally it also serves as a
|
||||
guide to the code.
|
||||
|
||||
### First example grammar: G0
|
||||
|
||||
We will be working through two example grammars. The first I call G0
|
||||
-- it is a reduced version of what the paper calls G1. It is
|
||||
interesting because it does not require splitting any states, and so
|
||||
we wind up with the same number of states as in LR0. Put another way,
|
||||
it is an LALR(1) grammar.
|
||||
|
||||
#### Grammar G0
|
||||
|
||||
```
|
||||
G0 = X "c"
|
||||
| Y "d"
|
||||
X = "e" X
|
||||
| "e"
|
||||
Y = "e" Y
|
||||
| "e"
|
||||
```
|
||||
|
||||
#### Step 1: Construct an LR(0) state machine
|
||||
|
||||
We begin by constructing an LR(0) state machine. The LR(0) states for
|
||||
G0 are as follows:
|
||||
|
||||
```
|
||||
S0 = G0 = (*) X "c"
|
||||
| G0 = (*) Y "d"
|
||||
| X = (*) "e" X
|
||||
| X = (*) "e"
|
||||
| Y = (*) "e" Y
|
||||
| Y = (*) "e"
|
||||
|
||||
S1 = X = "e" (*) X
|
||||
| X = "e" (*)
|
||||
| X = (*) "e"
|
||||
| X = (*) "e" "X"
|
||||
| Y = "e" (*) Y
|
||||
| Y = "e" (*)
|
||||
| Y = (*) "e"
|
||||
| Y = (*) "e" Y
|
||||
|
||||
S2 = X = "e" X (*)
|
||||
|
||||
S3 = G0 = X (*) "c"
|
||||
|
||||
S4 = Y = "e" Y (*)
|
||||
|
||||
S5 = G0 = Y (*) "d"
|
||||
|
||||
S6 = G0 = X "c" (*)
|
||||
|
||||
S7 = G0 = Y "d" (*)
|
||||
```
|
||||
|
||||
We can also consider *edges* between the states as follows,
|
||||
with the label being the symbol that is pushed onto the stack:
|
||||
|
||||
```
|
||||
S0 -"e"-> S1
|
||||
S1 -"e"-> S1
|
||||
S1 --X--> S2
|
||||
S0 --X--> S3
|
||||
S1 --Y--> S4
|
||||
S0 --Y--> S5
|
||||
S3 -"c"-> S6
|
||||
S5 -"d"-> S7
|
||||
```
|
||||
|
||||
Note that state S1 is "inconsistent", in that it has conflicting
|
||||
actions.
|
||||
|
||||
#### Step 2: Convert LR(0) states into LR(0-1) states.
|
||||
|
||||
The term LR(0-1), but basically the idea is that the lookahead in a
|
||||
LR(0-1) state can be either a set of terminals (as in LR(1)) or *none*
|
||||
(as in LR(0)). You can also think of it alternatively as adding a
|
||||
special "wildcard" symbol `_` to the grammar; in our actual code, we
|
||||
represent this with `TokenSet::all()`. We will thus denote the
|
||||
inconsistent state after transformation as follows, where each line
|
||||
has the "wildcard" lookahead:
|
||||
|
||||
```
|
||||
S1 = X = "e" (*) X [_]
|
||||
| X = "e" (*) [_]
|
||||
| X = (*) "e" [_]
|
||||
| X = (*) "e" "X" [_]
|
||||
| Y = "e" (*) Y [_]
|
||||
| Y = "e" (*) [_]
|
||||
| Y = (*) "e" [_]
|
||||
| Y = (*) "e" Y [_]
|
||||
```
|
||||
|
||||
Naturally, the state is still inconsistent.
|
||||
|
||||
#### Step 3: Resolve inconsistencies.
|
||||
|
||||
In the next step, we iterate over all of our LR(0-1) states. In this
|
||||
example, we will not need to create new states, but in future examples
|
||||
we will. The iteration thus consists of a queue and some code like
|
||||
this:
|
||||
|
||||
```rust
|
||||
let mut queue = Queue::new();
|
||||
queue.extend(/* all states */);
|
||||
while let Some(s) = queue.pop_front() {
|
||||
if /* s is an inconsistent state */ {
|
||||
resolve_inconsistencies(s, &mut queue);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
##### Step 3a: Build the lane table.
|
||||
|
||||
To resolve an inconsistent state, we first construct a **lane
|
||||
table**. This is done by the code in the `lane` module (the `table`
|
||||
module maintains the data structure). It works by structing at each
|
||||
conflict and tracing **backwards**. Let's start with the final table
|
||||
we will get for the state S1 and then we will work our way back to how
|
||||
it is constructed. First, let's identify the conflicting actions from
|
||||
S1 and give them indices:
|
||||
|
||||
```
|
||||
S1 = X = (*) "e" [_] // C0 -- shift "e"
|
||||
| X = "e" (*) [_] // C1 -- reduce `X = "e" (*)`
|
||||
| X = (*) "e" "X" [_] // C0 -- shift "e"
|
||||
| X = "e" (*) X [_]
|
||||
| Y = (*) "e" [_] // C0 -- shift "e"
|
||||
| Y = "e" (*) [_] // C2 -- reduce `Y = "e" (*)`
|
||||
| Y = (*) "e" Y [_] // C0 -- shift "e"
|
||||
| Y = "e" (*) Y [_]
|
||||
```
|
||||
|
||||
Several of the items can cause "Confliction Action 0" (C0), which is
|
||||
to shift an `"e"`. These are all mutually compatible. However, there
|
||||
are also two incompatible actions: C1 and C2, both reductions. In
|
||||
fact, we'll find that we look back at state S0, these 'conflicting'
|
||||
actions all occur with distinct lookahead. The purpose of the lane
|
||||
table is to summarize that information. The lane table we will up
|
||||
constructing for these conflicting actions is as follows:
|
||||
|
||||
```
|
||||
| State | C0 | C1 | C2 | Successors |
|
||||
| S0 | | ["c"] | ["d"] | {S1} |
|
||||
| S1 | ["e"] | [] | [] | {S1} |
|
||||
```
|
||||
|
||||
Here the idea is that the lane table summarizes the lookahead
|
||||
information contributed by each state. Note that for the *shift* the
|
||||
state S1 already has enough lookahead information: we only shift when
|
||||
we see the terminal we need next ("e"). But state C1 and C2, the lookahead
|
||||
actually came from S0, which is a predecessor state.
|
||||
|
||||
As I said earlier, the algorithm for constructing the table works by
|
||||
looking at the conflicting item and walking backwards. So let's
|
||||
illustrate with conflict C1. We have the conflicting item `X = "e"
|
||||
(*)`, and we are basically looking to find its lookahead. We know
|
||||
that somewhere in the distant past of our state machine there must be
|
||||
an item like
|
||||
|
||||
Foo = ...a (*) X ...b
|
||||
|
||||
that led us here. We want to find that item, so we can derive the
|
||||
lookahead from `...b` (whatever symbols come after `X`).
|
||||
|
||||
To do this, we will walk the graph. Our state at any point in time
|
||||
will be the pair of a state and an item in that state. To start out,
|
||||
then, we have `(S1, X = "e" (*))`, which is the conflict C1. Because
|
||||
the `(*)` is not at the "front" of this item, we have to figure out
|
||||
where this `"e"` came from on our stack, so we look for predecessors
|
||||
of the state S1 which have an item like `X = (*) e`. This leads us to
|
||||
S0 and also S1. So we can push two states in our search: `(S0, X = (*)
|
||||
"e")` and `(S1, X = (*) "e")`. Let's consider each in turn.
|
||||
|
||||
The next state is then `(S0, X = (*) "e")`. Here the `(*)` lies at the
|
||||
front of the item, so we search **the same state** S0 for items that
|
||||
would have led to this state via an *epsilon move*. This basically
|
||||
means an item like `Foo = ... (*) X ...` -- i.e., where the `(*)`
|
||||
appears directly before the nonterminal `X`. In our case, we will find
|
||||
`G0 = (*) X "c"`. This is great, because it tells us some lookahead
|
||||
("c", in particular), and hence we can stop our search. We add to the
|
||||
table the entry that the state S0 contributes lookahead "c" to the
|
||||
conflict C1. In some cases, we might find something like `Foo =
|
||||
... (*) X` instead, where the `X` we are looking for appears at the
|
||||
end. In that case, we have to restart our search, but looking for the
|
||||
lookahead for `Foo`.
|
||||
|
||||
The next state in our case is `(S1, X = (*) e)`. Again the `(*)` lies
|
||||
at the beginning and hence we search for things in the state S1 where
|
||||
`X` is the next symbol. We find `X = "e" (*) X`. This is not as good
|
||||
as last time, because there are no symbols appearing after X in this
|
||||
item, so it does not contribute any lookahead. We therefore can't stop
|
||||
our search yet, but we push the state `(S1, X = "e" (*) X)` -- this
|
||||
corresponds to the `Foo` state I mentioned at the end of the last
|
||||
paragraph, except that in this case `Foo` is the same nonterminal `X`
|
||||
we started with.
|
||||
|
||||
Looking at `(S1, X = "e" (*) X)`, we again have the `(*)` in the
|
||||
middle of the item, so we move it left, searching for predecessors
|
||||
with the item `X = (*) e X`. We will (again) find S0 and S1 have such
|
||||
items. In the case of S0, we will (again) find the context "c", which
|
||||
we dutifully add to the table (this has no effect, since it is already
|
||||
present). In the case of S1, we will (again) wind up at the state
|
||||
`(S1, X = "e" (*) X)`. Since we've already visited this state, we
|
||||
stop our search, it will not lead to new context.
|
||||
|
||||
At this point, our table column for C1 is complete. We can repeat the
|
||||
process for C2, which plays out in an analogous way.
|
||||
|
||||
##### Step 3b: Update the lookahead
|
||||
|
||||
Looking at the lane table we built, we can union the context sets in
|
||||
any particular column. We see that the context sets for each
|
||||
conflicting action are pairwise disjoint. Therefore, we can simply
|
||||
update each reduce action in our state with those lookaheads in mind,
|
||||
and hence render it consistent:
|
||||
|
||||
```
|
||||
S1 = X = (*) "e" [_]
|
||||
| X = "e" (*) ["c"] // lookahead from C1
|
||||
| X = (*) "e" "X" [_]
|
||||
| X = "e" (*) X [_]
|
||||
| Y = (*) "e" [_]
|
||||
| Y = "e" (*) ["d"] // lookahead from C2
|
||||
| Y = (*) "e" Y [_]
|
||||
| Y = "e" (*) Y [_]
|
||||
```
|
||||
|
||||
This is of course also what the LALR(1) state would look like (though
|
||||
it would include context for the other items, though that doesn't play
|
||||
into the final machine execution).
|
||||
|
||||
At this point we've covered enough to handle the grammar G0. Let's
|
||||
turn to a more complex grammar, grammar G1, and then we'll come back
|
||||
to cover the remaining steps.
|
||||
|
||||
### Second example: the grammar G1
|
||||
|
||||
G1 is a (typo corrected) version of the grammar from the paper. This
|
||||
grammar is not LALR(1) and hence it is more interesting, because it
|
||||
requires splitting states.
|
||||
|
||||
#### Grammar G1
|
||||
|
||||
```
|
||||
G1 = "a" X "d"
|
||||
| "a" Y "c"
|
||||
| "b" X "c"
|
||||
| "b" Y "d"
|
||||
X = "e" X
|
||||
| "e"
|
||||
Y = "e" Y
|
||||
| "e"
|
||||
```
|
||||
|
||||
The key point of this grammar is that when we see `... "e" "c"` and we
|
||||
wish to know whether to reduce to `X` or `Y`, we don't have enough
|
||||
information. We need to know what is in the `...`, because `"a" "e"
|
||||
"c"` means we reduce `"e"` to `Y` and `"b" "e" "c"` means we reduce to
|
||||
`X`. In terms of our *state machine*, this corresponds to *splitting*
|
||||
the states responsible for X and Y based on earlier context.
|
||||
|
||||
Let's look at a subset of the LR(0) states for G1:
|
||||
|
||||
```
|
||||
S0 = G0 = (*) "a" X "d"
|
||||
| G0 = (*) "a" Y "c"
|
||||
| G0 = (*) "b" X "c"
|
||||
| G0 = (*) "b" X "d"
|
||||
|
||||
S1 = G0 = "a" (*) X "d"
|
||||
| G0 = "a" (*) Y "c"
|
||||
| X = (*) "e" X
|
||||
| X = (*) "e"
|
||||
| Y = (*) "e" Y
|
||||
| Y = (*) "e"
|
||||
|
||||
S2 = G0 = "b" (*) X "c"
|
||||
| G0 = "b" (*) Y "d"
|
||||
| X = (*) "e" X
|
||||
| X = (*) "e"
|
||||
| Y = (*) "e" Y
|
||||
| Y = (*) "e"
|
||||
|
||||
S3 = X = "e" (*) X
|
||||
| X = "e" (*) // C1 -- can reduce
|
||||
| X = (*) "e" // C0 -- can shift "e"
|
||||
| X = (*) "e" "X" // C0 -- can shift "e"
|
||||
| Y = "e" (*) Y
|
||||
| Y = "e" (*) // C2 -- can reduce
|
||||
| Y = (*) "e" // C0 -- can shift "e"
|
||||
| Y = (*) "e" Y // C0 -- can shift "e"
|
||||
```
|
||||
|
||||
Here we can see the problem. The state S3 is inconsistent. But it is
|
||||
reachable from both S1 and S2. If we come from S1, then we can have (e.g.)
|
||||
`X "d"`, but if we come from S2, we expect `X "c"`.
|
||||
|
||||
Let's walk through our algorithm again. I'll start with step 3a.
|
||||
|
||||
### Step 3a: Build the lane table.
|
||||
|
||||
The lane table for state S3 will look like this:
|
||||
|
||||
```
|
||||
| State | C0 | C1 | C2 | Successors |
|
||||
| S1 | | ["d"] | ["c"] | {S3} |
|
||||
| S2 | | ["c"] | ["d"] | {S3} |
|
||||
| S3 | ["e"] | [] | [] | {S3} |
|
||||
```
|
||||
|
||||
Now if we union each column, we see that both C1 and C2 wind up with
|
||||
lookahead `{"c", "d"}`. This is our problem. We have to isolate things
|
||||
better. Therefore, step 3b ("update lookahead") does not apply. Instead
|
||||
we attempt step 3c.
|
||||
|
||||
### Step 3c: Isolate lanes
|
||||
|
||||
This part of the algorithm is only loosely described in the paper, but
|
||||
I think it works as follows. We will employ a union-find data
|
||||
structure. With each set, we will record a "context set", which
|
||||
records for each conflict the set of lookahead tokens (e.g.,
|
||||
`{C1:{"d"}}`).
|
||||
|
||||
A context set tells us how to map the lookahead to an action;
|
||||
therefire, to be self-consistent, the lookaheads for each conflict
|
||||
must be mutually disjoint. In other words, `{C1:{"d"}, C2:{"c"}}` is
|
||||
valid, and says to do C1 if we see a "d" and C2 if we see a "c". But
|
||||
`{C1:{"d"}, C2:{"d"}}` is not, because there are two actions.
|
||||
|
||||
Initially, each state in the lane table is mapped to itself, and the
|
||||
conflict set is derived from its column in the lane table:
|
||||
|
||||
```
|
||||
S1 = {C1:d, C2:c}
|
||||
S2 = {C1:c, C2:d}
|
||||
S3 = {C0:e}
|
||||
```
|
||||
|
||||
We designate "beachhead" states as those states in the table that are
|
||||
not reachable from another state in the table (i.e., using the
|
||||
successors). In this case, those are the states S1 and S2. We will be
|
||||
doing a DFS through the table and we want to use those as the starting
|
||||
points.
|
||||
|
||||
(Question: is there always at least one beachhead state? Seems like
|
||||
there must be.)
|
||||
|
||||
So we begin by iterating over the beachhead states.
|
||||
|
||||
```rust
|
||||
for beachhead in beachheads { ... }
|
||||
```
|
||||
|
||||
When we visit a state X, we will examine each of its successors Y. We
|
||||
consider whether the context set for Y can be merged with the context
|
||||
set for X. So, in our case, X will be S1 to start and Y will be S3.
|
||||
In this case, the context set can be merged, and hence we union S1, S3
|
||||
and wind up with the following union-find state:
|
||||
|
||||
```
|
||||
S1,S3 = {C0:e, C1:d, C2:c}
|
||||
S2 = {C1:c, C2:d}
|
||||
```
|
||||
|
||||
(Note that this union is just for the purpose of tracking context; it
|
||||
doesn't imply that S1 and S3 are the 'same states' or anything like
|
||||
that.)
|
||||
|
||||
Next we examine the edge S3 -> S3. Here the contexts are already
|
||||
merged and everything is happy, so we stop. (We already visited S3,
|
||||
after all.)
|
||||
|
||||
This finishes our first beachhead, so we proceed to the next edge, S2
|
||||
-> S3. Here we find that we **cannot** union the context: it would
|
||||
produce an inconsistent state. So what we do is we **clone** S3 to
|
||||
make a new state, S3', with the initial setup corresponding to the row
|
||||
for S3 from the lane table:
|
||||
|
||||
```
|
||||
S1,S3 = {C0:e, C1:d, C2:c}
|
||||
S2 = {C1:c, C2:d}
|
||||
S3' = {C0:e}
|
||||
```
|
||||
|
||||
This also involves updating our LR(0-1) state set to have a new state
|
||||
S3'. All edges from S2 that led to S3 now lead to S3'; the outgoing
|
||||
edges from S3' remain unchanged. (At least to start.)
|
||||
|
||||
Therefore, the edge `S2 -> S3` is now `S2 -> S3'`. We can now merge
|
||||
the conflicts:
|
||||
|
||||
```
|
||||
S1,S3 = {C0:e, C1:d, C2:c}
|
||||
S2,S3' = {C0:e, C1:c, C2:d}
|
||||
```
|
||||
|
||||
Now we examine the outgoing edge S3' -> S3. We cannot merge these
|
||||
conflicts, so we search (greedily, I guess) for a clone of S3 where we
|
||||
can merge the conflicts. We find one in S3', and hence we redirect the
|
||||
S3 edge to S3' and we are done. (I think the actual search we want is
|
||||
to make first look for a clone of S3 that is using literally the same
|
||||
context as us (i.e., same root node), as in this case. If that is not
|
||||
found, *then* we search for one with a mergable context. If *that*
|
||||
fails, then we clone a new state.)
|
||||
|
||||
The final state thus has two copies of S3, one for the path from S1,
|
||||
and one for the path from S2, which gives us enough context to
|
||||
proceed.
|
203
lalrpop/src/lr1/lane_table/construct/merge.rs
Normal file
203
lalrpop/src/lr1/lane_table/construct/merge.rs
Normal file
@ -0,0 +1,203 @@
|
||||
use super::*;
|
||||
|
||||
use collections::Multimap;
|
||||
use lr1::lane_table::table::context_set::ContextSet;
|
||||
|
||||
/// The "merge" phase of the algorithm is described in "Step 3c" of
|
||||
/// [the README][r]. It consists of walking through the various
|
||||
/// states in the lane table and merging them into sets of states that
|
||||
/// have compatible context sets; if we encounter a state S that has a
|
||||
/// successor T but where the context set of S is not compatible with
|
||||
/// T, then we will clone T into a new T2 (and hopefully the context
|
||||
/// set of S will be compatible with the reduced context of T2).
|
||||
///
|
||||
/// [r]: ../README.md
|
||||
pub struct Merge<'m, 'grammar: 'm> {
|
||||
table: &'m LaneTable<'grammar>,
|
||||
states: &'m mut Vec<LR1State<'grammar>>,
|
||||
visited: Set<StateIndex>,
|
||||
original_indices: Map<StateIndex, StateIndex>,
|
||||
clones: Multimap<StateIndex, Vec<StateIndex>>,
|
||||
target_states: Vec<StateIndex>,
|
||||
context_sets: ContextSets<'m>,
|
||||
}
|
||||
|
||||
impl<'m, 'grammar> Merge<'m, 'grammar> {
|
||||
pub fn new(table: &'m LaneTable<'grammar>,
|
||||
unify: &'m mut UnificationTable<StateSet>,
|
||||
states: &'m mut Vec<LR1State<'grammar>>,
|
||||
state_sets: &'m mut Map<StateIndex, StateSet>,
|
||||
inconsistent_state: StateIndex)
|
||||
-> Self {
|
||||
Merge {
|
||||
table: table,
|
||||
states: states,
|
||||
visited: Set::new(),
|
||||
original_indices: Map::new(),
|
||||
clones: Multimap::new(),
|
||||
target_states: vec![inconsistent_state],
|
||||
context_sets: ContextSets {
|
||||
unify: unify,
|
||||
state_sets: state_sets,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(&mut self, beachhead_state: StateIndex) -> Result<(), (StateIndex, StateIndex)> {
|
||||
debug!("Merge::start(beachhead_state={:?})", beachhead_state);
|
||||
|
||||
// Since we always start walks from beachhead states, and they
|
||||
// are not reachable from anyone else, this state should not
|
||||
// have been unioned with anything else yet.
|
||||
self.walk(beachhead_state)
|
||||
}
|
||||
|
||||
pub fn patch_target_starts(mut self, actions: &Set<Action<'grammar>>) {
|
||||
debug!("Merge::patch_target_starts(actions={:?})", actions);
|
||||
|
||||
for &target_state in &self.target_states {
|
||||
debug!("Merge::patch_target_starts: target_state={:?}", target_state);
|
||||
let context_set = self.context_sets.context_set(target_state);
|
||||
debug!("Merge::patch_target_starts: context_set={:?}", context_set);
|
||||
context_set.apply(&mut self.states[target_state.0], actions);
|
||||
}
|
||||
}
|
||||
|
||||
/// If `state` is a cloned state, find its original index. Useful
|
||||
/// for indexing into the lane table and so forth.
|
||||
fn original_index(&self, state: StateIndex) -> StateIndex {
|
||||
*self.original_indices.get(&state).unwrap_or(&state)
|
||||
}
|
||||
|
||||
fn successors(&self, state: StateIndex) -> Option<&'m Set<StateIndex>> {
|
||||
self.table.successors(self.original_index(state))
|
||||
}
|
||||
|
||||
fn walk(&mut self, state: StateIndex) -> Result<(), (StateIndex, StateIndex)> {
|
||||
debug!("Merge::walk(state={:?})", state);
|
||||
|
||||
if !self.visited.insert(state) {
|
||||
debug!("Merge::walk: visited already");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for &successor in self.successors(state).iter().flat_map(|&s| s) {
|
||||
debug!("Merge::walk: state={:?} successor={:?}",
|
||||
state, successor);
|
||||
|
||||
if self.context_sets.union(state, successor) {
|
||||
debug!("Merge::walk: successful union, context-set = {:?}",
|
||||
self.context_sets.context_set(state));
|
||||
self.walk(successor)?;
|
||||
} else {
|
||||
// search for an existing clone with which we can merge
|
||||
debug!("Merge::walk: union failed, seek existing clone");
|
||||
let existing_clone = {
|
||||
let context_sets = &mut self.context_sets;
|
||||
self.clones.get(&successor)
|
||||
.into_iter()
|
||||
.flat_map(|clones| clones) // get() returns an Option<Set>
|
||||
.cloned()
|
||||
.filter(|&successor1| context_sets.union(state, successor1))
|
||||
.next()
|
||||
};
|
||||
|
||||
if let Some(successor1) = existing_clone {
|
||||
debug!("Merge::walk: found existing clone {:?}", successor1);
|
||||
self.patch_links(state, successor, successor1);
|
||||
self.walk(successor1)?;
|
||||
} else {
|
||||
// if we don't find one, we have to make a new clone
|
||||
debug!("Merge::walk: creating new clone of {:?}", successor);
|
||||
let successor1 = self.clone(successor);
|
||||
if self.context_sets.union(state, successor1) {
|
||||
self.patch_links(state, successor, successor1);
|
||||
self.walk(successor1)?;
|
||||
} else {
|
||||
debug!("Merge::walk: failed to union {:?} with {:?}",
|
||||
state, successor1);
|
||||
debug!("Merge::walk: state context = {:?}",
|
||||
self.context_sets.context_set(state));
|
||||
debug!("Merge::walk: successor context = {:?}",
|
||||
self.context_sets.context_set(successor1));
|
||||
|
||||
return Err((self.original_index(state),
|
||||
self.original_index(successor1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn clone(&mut self, state: StateIndex) -> StateIndex {
|
||||
// create a new state with same contents as the old one
|
||||
let new_index = StateIndex(self.states.len());
|
||||
let new_state = self.states[state.0].clone();
|
||||
self.states.push(new_state);
|
||||
|
||||
// track the original index and clones
|
||||
let original_index = self.original_index(state);
|
||||
self.original_indices.insert(new_index, original_index);
|
||||
self.clones.push(original_index, new_index);
|
||||
|
||||
// create a new unify key for this new state
|
||||
let context_set = self.table.context_set(original_index).unwrap();
|
||||
self.context_sets.new_state(new_index, context_set);
|
||||
|
||||
// keep track of the clones of the target state
|
||||
if original_index == self.target_states[0] {
|
||||
self.target_states.push(new_index);
|
||||
}
|
||||
|
||||
debug!("Merge::clone: cloned {:?} to {:?}", state, new_index);
|
||||
new_index
|
||||
}
|
||||
|
||||
fn patch_links(&mut self,
|
||||
predecessor: StateIndex,
|
||||
original_successor: StateIndex,
|
||||
cloned_successor: StateIndex)
|
||||
{
|
||||
let replace = |target_state: &mut StateIndex| {
|
||||
if *target_state == original_successor {
|
||||
*target_state = cloned_successor;
|
||||
}
|
||||
};
|
||||
|
||||
let state = &mut self.states[predecessor.0];
|
||||
for (_, target_state) in &mut state.shifts {
|
||||
replace(target_state);
|
||||
}
|
||||
for (_, target_state) in &mut state.gotos {
|
||||
replace(target_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ContextSets<'m> {
|
||||
state_sets: &'m mut Map<StateIndex, StateSet>,
|
||||
unify: &'m mut UnificationTable<StateSet>,
|
||||
}
|
||||
|
||||
impl<'m> ContextSets<'m> {
|
||||
fn context_set(&mut self, state: StateIndex) -> ContextSet {
|
||||
let state_set = self.state_sets[&state];
|
||||
self.unify.probe_value(state_set)
|
||||
}
|
||||
|
||||
fn union(&mut self, source: StateIndex, target: StateIndex) -> bool {
|
||||
let set1 = self.state_sets[&source];
|
||||
let set2 = self.state_sets[&target];
|
||||
let result = self.unify.unify_var_var(set1, set2).is_ok();
|
||||
debug!("ContextSets::union: source={:?} target={:?} result={:?}",
|
||||
source, target, result);
|
||||
result
|
||||
}
|
||||
|
||||
fn new_state(&mut self, new_index: StateIndex, context_set: ContextSet) {
|
||||
let state_set = self.unify.new_key(context_set);
|
||||
self.state_sets.insert(new_index, state_set);
|
||||
}
|
||||
}
|
190
lalrpop/src/lr1/lane_table/construct/mod.rs
Normal file
190
lalrpop/src/lr1/lane_table/construct/mod.rs
Normal file
@ -0,0 +1,190 @@
|
||||
//!
|
||||
|
||||
use collections::{Map, Set};
|
||||
use ena::unify::UnificationTable;
|
||||
use grammar::repr::*;
|
||||
use lr1::build;
|
||||
use lr1::core::*;
|
||||
use lr1::first::FirstSets;
|
||||
use lr1::lookahead::{Lookahead, TokenSet};
|
||||
use lr1::lane_table::lane::LaneTracer;
|
||||
use lr1::lane_table::table::{ConflictIndex, LaneTable};
|
||||
use lr1::lane_table::table::context_set::OverlappingLookahead;
|
||||
use lr1::state_graph::StateGraph;
|
||||
use std::rc::Rc;
|
||||
|
||||
mod merge;
|
||||
use self::merge::Merge;
|
||||
|
||||
mod state_set;
|
||||
use self::state_set::StateSet;
|
||||
|
||||
pub struct LaneTableConstruct<'grammar> {
|
||||
grammar: &'grammar Grammar,
|
||||
first_sets: FirstSets,
|
||||
start: NonterminalString,
|
||||
}
|
||||
|
||||
impl<'grammar> LaneTableConstruct<'grammar> {
|
||||
pub fn new(grammar: &'grammar Grammar, start: NonterminalString) -> Self {
|
||||
let first_sets = FirstSets::new(grammar);
|
||||
Self {
|
||||
grammar: grammar,
|
||||
start: start,
|
||||
first_sets: first_sets,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn construct(self) -> Result<Vec<LR1State<'grammar>>, LR1TableConstructionError<'grammar>> {
|
||||
let TableConstructionError { states, conflicts: _ } = {
|
||||
match build::build_lr0_states(self.grammar, self.start) {
|
||||
// This is the easy (and very rare...) case.
|
||||
Ok(lr0) => return Ok(self.promote_lr0_states(lr0)),
|
||||
Err(err) => err,
|
||||
}
|
||||
};
|
||||
|
||||
// Convert the LR(0) states into LR(0-1) states.
|
||||
let mut states = self.promote_lr0_states(states);
|
||||
|
||||
// For each inconsistent state, apply the lane-table algorithm to
|
||||
// resolve it.
|
||||
for i in 0.. {
|
||||
if i >= states.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
match self.resolve_inconsistencies(&mut states, StateIndex(i)) {
|
||||
Ok(()) => { }
|
||||
Err(_) => {
|
||||
// We failed because of irreconcilable conflicts
|
||||
// somewhere. Just compute the conflicts from the final set of
|
||||
// states.
|
||||
let conflicts: Vec<Conflict<'grammar, TokenSet>> =
|
||||
states.iter()
|
||||
.flat_map(|s| Lookahead::conflicts(&s))
|
||||
.collect();
|
||||
return Err(TableConstructionError { states: states,
|
||||
conflicts: conflicts });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(states)
|
||||
}
|
||||
|
||||
/// Given a set of LR0 states, returns LR1 states where the lookahead
|
||||
/// is always `TokenSet::all()`. We refer to these states as LR(0-1)
|
||||
/// states in the README.
|
||||
fn promote_lr0_states(&self, lr0: Vec<LR0State<'grammar>>) -> Vec<LR1State<'grammar>> {
|
||||
let all = TokenSet::all();
|
||||
lr0.into_iter()
|
||||
.map(|s| {
|
||||
let items = s.items
|
||||
.vec
|
||||
.iter()
|
||||
.map(|item| {
|
||||
Item {
|
||||
production: item.production,
|
||||
index: item.index,
|
||||
lookahead: all.clone(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let reductions = s.reductions
|
||||
.into_iter()
|
||||
.map(|(_, p)| (all.clone(), p))
|
||||
.collect();
|
||||
State {
|
||||
index: s.index,
|
||||
items: Items { vec: Rc::new(items) },
|
||||
shifts: s.shifts,
|
||||
reductions: reductions,
|
||||
gotos: s.gotos,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn resolve_inconsistencies(&self,
|
||||
states: &mut Vec<LR1State<'grammar>>,
|
||||
inconsistent_state: StateIndex)
|
||||
-> Result<(), StateIndex> {
|
||||
let actions = super::conflicting_actions(&states[inconsistent_state.0]);
|
||||
if actions.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let table = self.build_lane_table(states, inconsistent_state, &actions);
|
||||
|
||||
// Consider first the "LALR" case, where the lookaheads for each
|
||||
// action are completely disjoint.
|
||||
if self.attempt_lalr(&mut states[inconsistent_state.0], &table, &actions) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Construct the initial states; each state will map to a
|
||||
// context-set derived from its row in the lane-table. This is
|
||||
// fallible, because a state may be internally inconstent.
|
||||
//
|
||||
// (To handle unification, we also map each state to a
|
||||
// `StateSet` that is its entry in the `ena` table.)
|
||||
let rows = table.rows()?;
|
||||
let mut unify = UnificationTable::<StateSet>::new();
|
||||
let mut state_sets = Map::new();
|
||||
for (&state_index, context_set) in &rows {
|
||||
let state_set = unify.new_key(context_set.clone());
|
||||
state_sets.insert(state_index, state_set);
|
||||
debug!("resolve_inconsistencies: state_index={:?}, state_set={:?}",
|
||||
state_index, state_set);
|
||||
}
|
||||
|
||||
// Now merge state-sets, cloning states where needed.
|
||||
let mut merge = Merge::new(&table, &mut unify, states, &mut state_sets, inconsistent_state);
|
||||
let beachhead_states = table.beachhead_states();
|
||||
for beachhead_state in beachhead_states {
|
||||
match merge.start(beachhead_state) {
|
||||
Ok(()) => { }
|
||||
Err((source, _)) => return Err(source),
|
||||
}
|
||||
}
|
||||
merge.patch_target_starts(&actions);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn attempt_lalr(&self,
|
||||
state: &mut LR1State<'grammar>,
|
||||
table: &LaneTable<'grammar>,
|
||||
actions: &Set<Action<'grammar>>)
|
||||
-> bool {
|
||||
match table.columns() {
|
||||
Ok(columns) => {
|
||||
debug!("attempt_lalr, columns={:#?}", columns);
|
||||
columns.apply(state, actions);
|
||||
true
|
||||
}
|
||||
Err(OverlappingLookahead) => {
|
||||
debug!("attempt_lalr, OverlappingLookahead");
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn build_lane_table(&self,
|
||||
states: &[LR1State<'grammar>],
|
||||
inconsistent_state: StateIndex,
|
||||
actions: &Set<Action<'grammar>>)
|
||||
-> LaneTable<'grammar> {
|
||||
let state_graph = StateGraph::new(states);
|
||||
let mut tracer = LaneTracer::new(self.grammar,
|
||||
states,
|
||||
&self.first_sets,
|
||||
&state_graph,
|
||||
actions.len());
|
||||
for (i, &action) in actions.iter().enumerate() {
|
||||
tracer.start_trace(inconsistent_state, ConflictIndex::new(i), action);
|
||||
}
|
||||
tracer.into_table()
|
||||
}
|
||||
}
|
44
lalrpop/src/lr1/lane_table/construct/state_set.rs
Normal file
44
lalrpop/src/lr1/lane_table/construct/state_set.rs
Normal file
@ -0,0 +1,44 @@
|
||||
use ena::unify::{UnifyKey, UnifyValue};
|
||||
use lr1::lane_table::table::context_set::{ContextSet, OverlappingLookahead};
|
||||
|
||||
/// The unification key for a set of states in the lane table
|
||||
/// algorithm. Each set of states is associated with a
|
||||
/// `ContextSet`. When two sets of states are merged, their conflict
|
||||
/// sets are merged as well; this will fail if that would produce an
|
||||
/// overlapping conflict set.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub struct StateSet {
|
||||
index: u32
|
||||
}
|
||||
|
||||
impl UnifyKey for StateSet {
|
||||
type Value = ContextSet;
|
||||
|
||||
fn index(&self) -> u32 {
|
||||
self.index
|
||||
}
|
||||
|
||||
fn from_index(u: u32) -> Self {
|
||||
StateSet { index: u }
|
||||
}
|
||||
|
||||
fn tag() -> &'static str {
|
||||
"StateSet"
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: The `ena` interface is really designed around `UnifyValue`
|
||||
// being cheaply cloneable; we should either refactor `ena` a bit or
|
||||
// find some other way to associate a `ContextSet` with a state set
|
||||
// (for example, we could have each state set be associated with an
|
||||
// index that maps to a `ContextSet`), and do the merging ourselves.
|
||||
// But this is easier for now, and cloning a `ContextSet` isn't THAT
|
||||
// expensive, right? :)
|
||||
impl UnifyValue for ContextSet {
|
||||
fn unify_values(value1: &Self, value2: &Self) -> Result<Self, (Self, Self)> {
|
||||
match ContextSet::union(value1, value2) {
|
||||
Ok(v) => Ok(v),
|
||||
Err(OverlappingLookahead) => Err((value1.clone(), value2.clone())),
|
||||
}
|
||||
}
|
||||
}
|
@ -10,22 +10,24 @@ use lr1::state_graph::StateGraph;
|
||||
|
||||
use super::table::{ConflictIndex, LaneTable};
|
||||
|
||||
pub struct LaneTracer<'trace, 'grammar: 'trace> {
|
||||
states: &'trace [LR0State<'grammar>],
|
||||
first_sets: FirstSets,
|
||||
state_graph: StateGraph,
|
||||
pub struct LaneTracer<'trace, 'grammar: 'trace, L: Lookahead + 'trace> {
|
||||
states: &'trace [State<'grammar, L>],
|
||||
first_sets: &'trace FirstSets,
|
||||
state_graph: &'trace StateGraph,
|
||||
table: LaneTable<'grammar>,
|
||||
}
|
||||
|
||||
impl<'trace, 'grammar> LaneTracer<'trace, 'grammar> {
|
||||
impl<'trace, 'grammar, L: Lookahead> LaneTracer<'trace, 'grammar, L> {
|
||||
pub fn new(grammar: &'grammar Grammar,
|
||||
states: &'trace [LR0State<'grammar>],
|
||||
states: &'trace [State<'grammar, L>],
|
||||
first_sets: &'trace FirstSets,
|
||||
state_graph: &'trace StateGraph,
|
||||
conflicts: usize)
|
||||
-> Self {
|
||||
LaneTracer {
|
||||
states: states,
|
||||
first_sets: FirstSets::new(grammar),
|
||||
state_graph: StateGraph::new(states),
|
||||
first_sets: first_sets,
|
||||
state_graph: state_graph,
|
||||
table: LaneTable::new(grammar, conflicts),
|
||||
}
|
||||
}
|
||||
@ -37,25 +39,21 @@ impl<'trace, 'grammar> LaneTracer<'trace, 'grammar> {
|
||||
pub fn start_trace(&mut self,
|
||||
state: StateIndex,
|
||||
conflict: ConflictIndex,
|
||||
item: LR0Item<'grammar>) {
|
||||
action: Action<'grammar>) {
|
||||
let mut visited_set = Set::default();
|
||||
|
||||
// if the conflict item is a "shift" item, then the context
|
||||
// is always the terminal to shift (and conflicts only arise
|
||||
// around shifting terminal, so it must be a terminal)
|
||||
match item.shift_symbol() {
|
||||
Some((Symbol::Terminal(term), _)) => {
|
||||
match action {
|
||||
Action::Shift(term, _) => {
|
||||
let mut token_set = TokenSet::new();
|
||||
token_set.insert(Token::Terminal(term));
|
||||
self.table.add_lookahead(state, conflict, &token_set);
|
||||
}
|
||||
|
||||
Some((Symbol::Nonterminal(_), _)) => {
|
||||
panic!("invalid conflict item `{:?}`: shifts nonterminal",
|
||||
item);
|
||||
}
|
||||
|
||||
None => {
|
||||
Action::Reduce(prod) => {
|
||||
let item = Item::lr0(prod, prod.symbols.len());
|
||||
self.continue_trace(state, conflict, item, &mut visited_set);
|
||||
}
|
||||
}
|
||||
@ -114,14 +112,14 @@ impl<'trace, 'grammar> LaneTracer<'trace, 'grammar> {
|
||||
|
||||
let state_items = &self.states[state.0].items.vec;
|
||||
let nonterminal = item.production.nonterminal;
|
||||
for &pred_item in state_items.iter()
|
||||
.filter(|i| i.can_shift_nonterminal(nonterminal)) {
|
||||
for pred_item in state_items.iter()
|
||||
.filter(|i| i.can_shift_nonterminal(nonterminal)) {
|
||||
let symbol_sets = pred_item.symbol_sets();
|
||||
let mut first = self.first_sets.first0(symbol_sets.suffix);
|
||||
let derives_epsilon = first.take_eof();
|
||||
self.table.add_lookahead(state, conflict, &first);
|
||||
if derives_epsilon {
|
||||
self.continue_trace(state, conflict, pred_item, visited);
|
||||
self.continue_trace(state, conflict, pred_item.to_lr0(), visited);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,72 +1,26 @@
|
||||
use collections::Set;
|
||||
use lr1::build;
|
||||
use lr1::core::*;
|
||||
use lr1::lookahead::{Lookahead, Nil};
|
||||
use lr1::lookahead::Lookahead;
|
||||
use grammar::repr::*;
|
||||
|
||||
mod construct;
|
||||
mod lane;
|
||||
mod table;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
use self::lane::*;
|
||||
use self::table::*;
|
||||
|
||||
pub fn build_lane_table_states<'grammar>(grammar: &'grammar Grammar,
|
||||
start: NonterminalString)
|
||||
-> LR1Result<'grammar> {
|
||||
let (lr0_states, lr0_conflicts) = match build::build_lr0_states(grammar, start) {
|
||||
Ok(s) => (s, vec![]),
|
||||
Err(e) => (e.states, e.conflicts),
|
||||
};
|
||||
|
||||
// this is mostly just dummy code to ensure that things get used
|
||||
// and avoid dead-code warnings
|
||||
for conflict in lr0_conflicts {
|
||||
let inconsistent_state = &lr0_states[conflict.state.0];
|
||||
let conflicting_items = conflicting_items(inconsistent_state);
|
||||
println!("conflicting_items={:#?}", conflicting_items);
|
||||
let mut tracer = LaneTracer::new(&grammar, &lr0_states, conflicting_items.len());
|
||||
for (i, &conflicting_item) in conflicting_items.iter().enumerate() {
|
||||
tracer.start_trace(inconsistent_state.index,
|
||||
ConflictIndex::new(i),
|
||||
conflicting_item);
|
||||
}
|
||||
let _ = tracer.into_table();
|
||||
}
|
||||
|
||||
unimplemented!()
|
||||
construct::LaneTableConstruct::new(grammar, start).construct()
|
||||
}
|
||||
|
||||
fn conflicting_items<'grammar>(state: &LR0State<'grammar>) -> Set<LR0Item<'grammar>> {
|
||||
let conflicts = Nil::conflicts(state);
|
||||
|
||||
let reductions1 = conflicts.iter()
|
||||
.map(|c| Item::lr0(c.production, c.production.symbols.len()));
|
||||
|
||||
let reductions2 = conflicts.iter()
|
||||
.filter_map(|c| {
|
||||
match c.action {
|
||||
Action::Reduce(p) => Some(Item::lr0(p, p.symbols.len())),
|
||||
Action::Shift(..) => None,
|
||||
}
|
||||
});
|
||||
|
||||
let shifts = conflicts.iter()
|
||||
.filter_map(|c| {
|
||||
match c.action {
|
||||
Action::Shift(term, _) => Some(term),
|
||||
Action::Reduce(..) => None,
|
||||
}
|
||||
})
|
||||
.flat_map(|term| {
|
||||
state.items
|
||||
.vec
|
||||
.iter()
|
||||
.filter(move |item| item.can_shift_terminal(term))
|
||||
.cloned()
|
||||
});
|
||||
|
||||
reductions1.chain(reductions2).chain(shifts).collect()
|
||||
fn conflicting_actions<'grammar, L: Lookahead>(state: &State<'grammar, L>)
|
||||
-> Set<Action<'grammar>>
|
||||
{
|
||||
let conflicts = L::conflicts(state);
|
||||
let reductions = conflicts.iter().map(|c| Action::Reduce(c.production));
|
||||
let actions = conflicts.iter().map(|c| c.action);
|
||||
reductions.chain(actions).collect()
|
||||
}
|
||||
|
98
lalrpop/src/lr1/lane_table/table/context_set/mod.rs
Normal file
98
lalrpop/src/lr1/lane_table/table/context_set/mod.rs
Normal file
@ -0,0 +1,98 @@
|
||||
//! A key part of the lane-table algorithm is the idea of a CONTEXT
|
||||
//! SET (my name, the paper has no name for this). Basically it
|
||||
//! represents the LR1 context under which a given conflicting action
|
||||
//! would take place.
|
||||
//!
|
||||
//! So, for example, imagine this grammar:
|
||||
//!
|
||||
//! ```notrust
|
||||
//! A = B x
|
||||
//! | C y
|
||||
//! B = z
|
||||
//! C = z
|
||||
//! ```
|
||||
//!
|
||||
//! This gives rise to states like:
|
||||
//!
|
||||
//! - `S0 = { * B x, * C y, B = * z, C = * z }`
|
||||
//! - `S1 = { B = z *, C = z * }`
|
||||
//!
|
||||
//! This second state has two conflicting items. Let's call them
|
||||
//! conflicts 0 and 1 respectively. The conflict set would then have
|
||||
//! two entries (one for each conflict) and it would map each of them
|
||||
//! to a TokenSet supplying context. So when we trace everything
|
||||
//! out we might get a ContextSet of:
|
||||
//!
|
||||
//! - `[ 0: x, 1: y ]`
|
||||
//!
|
||||
//! In general, you want to ensure that the token sets of all
|
||||
//! conflicting items are pairwise-disjoint, or else if you get to a
|
||||
//! state that has both of those items (which, by definition, does
|
||||
//! arise) you won't know which to take. In this case, we're all set,
|
||||
//! because item 0 occurs only with lookahead `x` and item 1 with
|
||||
//! lookahead `y`.
|
||||
|
||||
use collections::{Set, Map};
|
||||
use lr1::core::*;
|
||||
use lr1::lookahead::*;
|
||||
mod test;
|
||||
|
||||
use super::ConflictIndex;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ContextSet {
|
||||
values: Vec<TokenSet>
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OverlappingLookahead;
|
||||
|
||||
impl ContextSet {
|
||||
pub fn new(num_conflicts: usize) -> Self {
|
||||
ContextSet {
|
||||
values: (0..num_conflicts).map(|_| TokenSet::new()).collect()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn union(set1: &ContextSet, set2: &ContextSet) -> Result<Self, OverlappingLookahead> {
|
||||
let mut result = set1.clone();
|
||||
for (i, t) in set2.values.iter().enumerate() {
|
||||
result.insert(ConflictIndex::new(i), t)?;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Attempts to merge the values `conflict: set` into this
|
||||
/// conflict set. If this would result in an invalid conflict set
|
||||
/// (where two conflicts have overlapping lookahead), then returns
|
||||
/// `Err(OverlappingLookahead)` and has no effect.
|
||||
///
|
||||
/// Assuming no errors, returns `Ok(true)` if this resulted in any
|
||||
/// modifications, and `Ok(false)` otherwise.
|
||||
pub fn insert(&mut self, conflict: ConflictIndex, set: &TokenSet) -> Result<bool, OverlappingLookahead> {
|
||||
for (value, index) in self.values.iter().zip((0..).map(ConflictIndex::new)) {
|
||||
if index != conflict {
|
||||
if value.is_intersecting(&set) {
|
||||
return Err(OverlappingLookahead);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self.values[conflict.index].union_with(&set))
|
||||
}
|
||||
|
||||
pub fn apply<'grammar>(&self,
|
||||
state: &mut LR1State<'grammar>,
|
||||
actions: &Set<Action<'grammar>>) {
|
||||
// create a map from each action to its lookahead
|
||||
let lookaheads: Map<Action<'grammar>, &TokenSet> = actions.iter()
|
||||
.cloned()
|
||||
.zip(&self.values)
|
||||
.collect();
|
||||
|
||||
for &mut (ref mut lookahead, production) in &mut state.reductions {
|
||||
let action = Action::Reduce(production);
|
||||
*lookahead = lookaheads[&action].clone();
|
||||
}
|
||||
}
|
||||
}
|
1
lalrpop/src/lr1/lane_table/table/context_set/test.rs
Normal file
1
lalrpop/src/lr1/lane_table/table/context_set/test.rs
Normal file
@ -0,0 +1 @@
|
||||
#![cfg(test)]
|
@ -21,6 +21,9 @@ use std::default::Default;
|
||||
use std::fmt::{Debug, Error, Formatter};
|
||||
use std::iter;
|
||||
|
||||
pub mod context_set;
|
||||
use self::context_set::{ContextSet, OverlappingLookahead};
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
|
||||
pub struct ConflictIndex {
|
||||
index: usize,
|
||||
@ -62,6 +65,79 @@ impl<'grammar> LaneTable<'grammar> {
|
||||
pub fn add_successor(&mut self, state: StateIndex, succ: StateIndex) {
|
||||
self.successors.push(state, succ);
|
||||
}
|
||||
|
||||
/// Unions together the lookaheads for each column and returns a
|
||||
/// context set containing all of them. For an LALR(1) grammar,
|
||||
/// these token sets will be mutually disjoint, as discussed in
|
||||
/// the [README]; otherwise `Err` will be returned.
|
||||
///
|
||||
/// [README]: ../README.md
|
||||
pub fn columns(&self) -> Result<ContextSet, OverlappingLookahead> {
|
||||
let mut columns = ContextSet::new(self.conflicts);
|
||||
for (&(_, conflict_index), set) in &self.lookaheads {
|
||||
columns.insert(conflict_index, set)?;
|
||||
}
|
||||
Ok(columns)
|
||||
}
|
||||
|
||||
pub fn successors(&self, state: StateIndex) -> Option<&Set<StateIndex>> {
|
||||
self.successors.get(&state)
|
||||
}
|
||||
|
||||
/// Returns the state of states in the table that are **not**
|
||||
/// reachable from another state in the table. These are called
|
||||
/// "beachhead states".
|
||||
pub fn beachhead_states(&self) -> Set<StateIndex> {
|
||||
// set of all states that are reachable from another state
|
||||
let reachable: Set<StateIndex> =
|
||||
self.successors.iter()
|
||||
.flat_map(|(_pred, succ)| succ)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
self.lookaheads.keys()
|
||||
.map(|&(state_index, _)| state_index)
|
||||
.filter(|s| !reachable.contains(s))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn context_set(&self, state: StateIndex) -> Result<ContextSet, OverlappingLookahead> {
|
||||
let mut set = ContextSet::new(self.conflicts);
|
||||
for (&(state_index, conflict_index), token_set) in &self.lookaheads {
|
||||
if state_index == state {
|
||||
set.insert(conflict_index, token_set)?;
|
||||
}
|
||||
}
|
||||
Ok(set)
|
||||
}
|
||||
|
||||
/// Returns a map containing all states that appear in the table,
|
||||
/// along with the context set for each state (i.e., each row in
|
||||
/// the table, basically). Returns Err if any state has a conflict
|
||||
/// between the context sets even within its own row.
|
||||
pub fn rows(&self) -> Result<Map<StateIndex, ContextSet>, StateIndex> {
|
||||
let mut map = Map::new();
|
||||
for (&(state_index, conflict_index), token_set) in &self.lookaheads {
|
||||
match {
|
||||
map.entry(state_index)
|
||||
.or_insert_with(|| ContextSet::new(self.conflicts))
|
||||
.insert(conflict_index, token_set)
|
||||
} {
|
||||
Ok(_changed) => { }
|
||||
Err(OverlappingLookahead) => return Err(state_index)
|
||||
}
|
||||
}
|
||||
|
||||
// In some cases, there are states that have no context at
|
||||
// all, only successors. In that case, make sure to add an
|
||||
// empty row for them.
|
||||
for (&state_index, _) in &self.successors {
|
||||
map.entry(state_index)
|
||||
.or_insert_with(|| ContextSet::new(self.conflicts));
|
||||
}
|
||||
|
||||
Ok(map)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'grammar> Debug for LaneTable<'grammar> {
|
||||
|
@ -3,15 +3,27 @@ use grammar::repr::*;
|
||||
use test_util::{expect_debug, normalized_grammar};
|
||||
use lr1::build;
|
||||
use lr1::core::*;
|
||||
use lr1::first::FirstSets;
|
||||
use lr1::interpret;
|
||||
use lr1::state_graph::StateGraph;
|
||||
use lr1::tls::Lr1Tls;
|
||||
use tls::Tls;
|
||||
|
||||
use super::construct::*;
|
||||
use super::lane::*;
|
||||
use super::table::*;
|
||||
|
||||
macro_rules! tokens {
|
||||
($($x:expr),*) => {
|
||||
vec![$(TerminalString::quoted(intern($x))),*].into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
fn sym(t: &str) -> Symbol {
|
||||
if t.chars().next().unwrap().is_uppercase() {
|
||||
if t.chars()
|
||||
.next()
|
||||
.unwrap()
|
||||
.is_uppercase() {
|
||||
Symbol::Nonterminal(nt(t))
|
||||
} else {
|
||||
Symbol::Terminal(term(t))
|
||||
@ -33,7 +45,7 @@ fn traverse(states: &[LR0State], tokens: &[&str]) -> StateIndex {
|
||||
/// A simplified version of the paper's initial grammar; this version
|
||||
/// only has one inconsistent state (the same state they talk about in
|
||||
/// the paper).
|
||||
pub fn paper_example_small() -> Grammar {
|
||||
pub fn paper_example_g0() -> Grammar {
|
||||
normalized_grammar(r#"
|
||||
grammar;
|
||||
|
||||
@ -42,6 +54,39 @@ pub G: () = {
|
||||
Y "d",
|
||||
};
|
||||
|
||||
X: () = {
|
||||
"e" X,
|
||||
"e",}
|
||||
;
|
||||
|
||||
Y: () = {
|
||||
"e" Y,
|
||||
"e"
|
||||
};
|
||||
"#)
|
||||
}
|
||||
|
||||
/// A (corrected) version of the sample grammar G1 from the paper. The
|
||||
/// grammar as written in the text omits some items, but the diagrams
|
||||
/// seem to contain the full set. I believe this is one of the
|
||||
/// smallest examples that still requires splitting states from the
|
||||
/// LR0 states.
|
||||
pub fn paper_example_g1() -> Grammar {
|
||||
normalized_grammar(r#"
|
||||
grammar;
|
||||
|
||||
pub G: () = {
|
||||
// if "a" is input, then lookahead "d" means "reduce X"
|
||||
// and lookahead "c" means "reduce "Y"
|
||||
"a" X "d",
|
||||
"a" Y "c",
|
||||
|
||||
// if "b" is input, then lookahead "d" means "reduce Y"
|
||||
// and lookahead "c" means "reduce X.
|
||||
"b" X "c",
|
||||
"b" Y "d",
|
||||
};
|
||||
|
||||
X: () = {
|
||||
"e" X,
|
||||
"e",
|
||||
@ -67,9 +112,15 @@ fn build_table<'grammar>(grammar: &'grammar Grammar,
|
||||
println!("inconsistent_state={:#?}", inconsistent_state.items);
|
||||
|
||||
// Extract conflicting items and trace the lanes, constructing a table
|
||||
let conflicting_items = super::conflicting_items(inconsistent_state);
|
||||
let conflicting_items = super::conflicting_actions(inconsistent_state);
|
||||
println!("conflicting_items={:#?}", conflicting_items);
|
||||
let mut tracer = LaneTracer::new(&grammar, &lr0_err.states, conflicting_items.len());
|
||||
let first_sets = FirstSets::new(&grammar);
|
||||
let state_graph = StateGraph::new(&lr0_err.states);
|
||||
let mut tracer = LaneTracer::new(&grammar,
|
||||
&lr0_err.states,
|
||||
&first_sets,
|
||||
&state_graph,
|
||||
conflicting_items.len());
|
||||
for (i, &conflicting_item) in conflicting_items.iter().enumerate() {
|
||||
tracer.start_trace(inconsistent_state.index,
|
||||
ConflictIndex::new(i),
|
||||
@ -80,19 +131,91 @@ fn build_table<'grammar>(grammar: &'grammar Grammar,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn small_conflict_1() {
|
||||
fn g0_conflict_1() {
|
||||
let _tls = Tls::test();
|
||||
let grammar = paper_example_small();
|
||||
let grammar = paper_example_g0();
|
||||
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
|
||||
let table = build_table(&grammar, "G", &["e"]);
|
||||
println!("{:#?}", table);
|
||||
// conflicting_actions={
|
||||
// Shift("e") // C0
|
||||
// Reduce(X = "e" => ActionFn(4)) // C1
|
||||
// Reduce(Y = "e" => ActionFn(6)) // C2
|
||||
// }
|
||||
expect_debug(&table,
|
||||
r#"
|
||||
| State | C0 | C1 | C2 | C3 | C4 | C5 | Successors |
|
||||
| S0 | | ["c"] | | | ["d"] | | {S3} |
|
||||
| S3 | ["e"] | [] | ["e"] | ["e"] | [] | ["e"] | {S3} |
|
||||
| State | C0 | C1 | C2 | Successors |
|
||||
| S0 | | ["c"] | ["d"] | {S3} |
|
||||
| S3 | ["e"] | [] | [] | {S3} |
|
||||
"#
|
||||
.trim_left());
|
||||
.trim_left());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paper_example_g1_conflict_1() {
|
||||
let _tls = Tls::test();
|
||||
let grammar = paper_example_g1();
|
||||
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
|
||||
let table = build_table(&grammar, "G", &["a", "e"]);
|
||||
println!("{:#?}", table);
|
||||
// conflicting_actions={
|
||||
// Shift("e") // C0
|
||||
// Reduce(X = "e" => ActionFn(6)) // C1
|
||||
// Reduce(Y = "e" => ActionFn(8)) // C2
|
||||
// }
|
||||
expect_debug(&table,
|
||||
r#"
|
||||
| State | C0 | C1 | C2 | Successors |
|
||||
| S1 | | ["d"] | ["c"] | {S5} |
|
||||
| S2 | | ["c"] | ["d"] | {S5} |
|
||||
| S5 | ["e"] | [] | [] | {S5} |
|
||||
"#
|
||||
.trim_left());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paper_example_g0_build() {
|
||||
let _tls = Tls::test();
|
||||
let grammar = paper_example_g0();
|
||||
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
|
||||
let lr0_err = build::build_lr0_states(&grammar, nt("G")).unwrap_err();
|
||||
let states = LaneTableConstruct::new(&grammar, nt("G")).construct()
|
||||
.expect("failed to build lane table states");
|
||||
|
||||
// we do not require more *states* than LR(0), just different lookahead
|
||||
assert_eq!(states.len(), lr0_err.states.len());
|
||||
|
||||
let tree = interpret::interpret(&states, tokens!["e", "c"]).unwrap();
|
||||
expect_debug(&tree, r#"[G: [X: "e"], "c"]"#);
|
||||
|
||||
let tree = interpret::interpret(&states, tokens!["e", "e", "c"]).unwrap();
|
||||
expect_debug(&tree, r#"[G: [X: "e", [X: "e"]], "c"]"#);
|
||||
|
||||
let tree = interpret::interpret(&states, tokens!["e", "e", "d"]).unwrap();
|
||||
expect_debug(&tree, r#"[G: [Y: "e", [Y: "e"]], "d"]"#);
|
||||
|
||||
interpret::interpret(&states, tokens!["e", "e", "e"]).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paper_example_g1_build() {
|
||||
let _tls = Tls::test();
|
||||
let grammar = paper_example_g1();
|
||||
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
|
||||
let lr0_err = build::build_lr0_states(&grammar, nt("G")).unwrap_err();
|
||||
let states = LaneTableConstruct::new(&grammar, nt("G")).construct()
|
||||
.expect("failed to build lane table states");
|
||||
|
||||
// we require more *states* than LR(0), not just different lookahead
|
||||
assert_eq!(states.len() - lr0_err.states.len(), 1);
|
||||
|
||||
let tree = interpret::interpret(&states, tokens!["a", "e", "e", "d"]).unwrap();
|
||||
expect_debug(&tree, r#"[G: "a", [X: "e", [X: "e"]], "d"]"#);
|
||||
|
||||
let tree = interpret::interpret(&states, tokens!["b", "e", "e", "d"]).unwrap();
|
||||
expect_debug(&tree, r#"[G: "b", [Y: "e", [Y: "e"]], "d"]"#);
|
||||
|
||||
interpret::interpret(&states, tokens!["e", "e", "e"]).unwrap_err();
|
||||
}
|
||||
|
||||
pub fn paper_example_large() -> Grammar {
|
||||
@ -158,26 +281,35 @@ fn large_conflict_1() {
|
||||
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
|
||||
let table = build_table(&grammar, "G", &["x", "s", "k", "t"]);
|
||||
println!("{:#?}", table);
|
||||
|
||||
// conflicting_actions={
|
||||
// Shift("s") // C0
|
||||
// Reduce(U = U "k" "t") // C1
|
||||
// Reduce(X = "k" "t") // C2
|
||||
// Reduce(Y = "k" "t") // C3
|
||||
// }
|
||||
|
||||
expect_debug(&table,
|
||||
r#"
|
||||
| State | C0 | C1 | C2 | C3 | Successors |
|
||||
| S1 | ["k"] | | | | {S5} |
|
||||
| S2 | ["k"] | | | | {S7} |
|
||||
| S3 | ["k"] | | | | {S7} |
|
||||
| S4 | ["k"] | | | | {S7} |
|
||||
| S1 | | ["k"] | | | {S5} |
|
||||
| S2 | | ["k"] | | | {S7} |
|
||||
| S3 | | ["k"] | | | {S7} |
|
||||
| S4 | | ["k"] | | | {S7} |
|
||||
| S5 | | | ["a"] | ["r"] | {S16} |
|
||||
| S7 | | | ["c", "w"] | ["d"] | {S16} |
|
||||
| S16 | | | | | {S27} |
|
||||
| S27 | ["k"] | ["s"] | | | {S32} |
|
||||
| S27 | ["s"] | ["k"] | | | {S32} |
|
||||
| S32 | | | ["z"] | ["u"] | {S16} |
|
||||
"#
|
||||
.trim_left());
|
||||
.trim_left());
|
||||
|
||||
// ^^ This differs in some particulars from what appears in the
|
||||
// paper, but I believe it to be correct, and the paper to be wrong.
|
||||
//
|
||||
// Here is the table using the state names from the paper. I've marked
|
||||
// the differences with `(*)`.
|
||||
// Here is the table using the state names from the paper. I've
|
||||
// marked the differences with `(*)`. Note that the paper does not
|
||||
// include the C0 column (the shift).
|
||||
//
|
||||
// | State | pi1 | pi2 | pi3 | Successors |
|
||||
// | B | ["k"] | | *1 | {G} |
|
||||
@ -204,3 +336,16 @@ fn large_conflict_1() {
|
||||
// X P", and the lookahead from the "X" here is FIRST(P) which is
|
||||
// "z".
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paper_example_large_build() {
|
||||
let _tls = Tls::test();
|
||||
let grammar = paper_example_large();
|
||||
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
|
||||
let states = LaneTableConstruct::new(&grammar, nt("G")).construct()
|
||||
.expect("failed to build lane table states");
|
||||
|
||||
let tree = interpret::interpret(&states, tokens!["y", "s", "k", "t", "c", "b"]).unwrap();
|
||||
expect_debug(&tree, r#"[G: "y", [W: [U: "s"], [X: "k", "t"], [C: "c"]], "b"]"#);
|
||||
}
|
||||
|
||||
|
@ -152,11 +152,23 @@ impl TokenSet {
|
||||
pub fn new() -> Self {
|
||||
with(|terminals| {
|
||||
TokenSet {
|
||||
bit_set: BitSet::with_capacity(terminals.all.len() + 1)
|
||||
bit_set: BitSet::with_capacity(terminals.all.len() + 2)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// A TokenSet containing all possible terminals + EOF.
|
||||
pub fn all() -> Self {
|
||||
let mut s = TokenSet::new();
|
||||
with(|terminals| {
|
||||
for i in 0 .. terminals.all.len() {
|
||||
s.bit_set.insert(i);
|
||||
}
|
||||
s.insert_eof();
|
||||
});
|
||||
s
|
||||
}
|
||||
|
||||
pub fn eof() -> Self {
|
||||
let mut set = TokenSet::new();
|
||||
set.insert_eof();
|
||||
|
Loading…
x
Reference in New Issue
Block a user