Merge pull request #1069 from alexcrichton/rejigger-stack

Switch from heap/stack to just a heap
This commit is contained in:
Alex Crichton 2018-11-30 14:17:54 -06:00 committed by GitHub
commit 13d9e47d17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 137 additions and 250 deletions

View File

@ -392,7 +392,11 @@ impl<'a, 'b> Js2Rust<'a, 'b> {
if arg.is_ref_anyref() { if arg.is_ref_anyref() {
self.js_arguments.push((name.clone(), "any".to_string())); self.js_arguments.push((name.clone(), "any".to_string()));
self.cx.expose_borrowed_objects(); self.cx.expose_borrowed_objects();
self.finally("stack.pop();"); self.cx.expose_global_stack_pointer();
// the "stack-ful" nature means that we're always popping from the
// stack, and make sure that we actually clear our reference to
// allow stale values to get GC'd
self.finally("heap[stack_pointer++] = undefined;");
self.rust_arguments self.rust_arguments
.push(format!("addBorrowedObject({})", name)); .push(format!("addBorrowedObject({})", name));
return Ok(self); return Ok(self);

View File

@ -114,7 +114,9 @@ enum Import<'a> {
}, },
} }
const INITIAL_SLAB_VALUES: &[&str] = &["undefined", "null", "true", "false"]; const INITIAL_HEAP_VALUES: &[&str] = &["undefined", "null", "true", "false"];
// Must be kept in sync with `src/lib.rs` of the `wasm-bindgen` crate
const INITIAL_HEAP_OFFSET: usize = 32;
impl<'a> Context<'a> { impl<'a> Context<'a> {
fn export(&mut self, name: &str, contents: &str, comments: Option<String>) { fn export(&mut self, name: &str, contents: &str, comments: Option<String>) {
@ -168,44 +170,20 @@ impl<'a> Context<'a> {
self.write_classes()?; self.write_classes()?;
self.bind("__wbindgen_object_clone_ref", &|me| { self.bind("__wbindgen_object_clone_ref", &|me| {
me.expose_add_heap_object();
me.expose_get_object(); me.expose_get_object();
let bump_cnt = if me.config.debug { me.expose_add_heap_object();
String::from( Ok(String::from(
" "
if (typeof(val) === 'number') throw new Error('corrupt slab'); function(idx) {
val.cnt += 1; return addHeapObject(getObject(idx));
", }
)
} else {
String::from("val.cnt += 1;")
};
Ok(format!(
" "
function(idx) {{
// If this object is on the stack promote it to the heap.
if ((idx & 1) === 1) return addHeapObject(getObject(idx));
// Otherwise if the object is on the heap just bump the
// refcount and move on
const val = slab[idx >> 1];
{}
return idx;
}}
",
bump_cnt
)) ))
})?; })?;
self.bind("__wbindgen_object_drop_ref", &|me| { self.bind("__wbindgen_object_drop_ref", &|me| {
me.expose_drop_ref(); me.expose_drop_ref();
Ok(String::from( Ok(String::from("function(i) { dropObject(i); }"))
"
function(i) {
dropRef(i);
}
",
))
})?; })?;
self.bind("__wbindgen_string_new", &|me| { self.bind("__wbindgen_string_new", &|me| {
@ -222,13 +200,7 @@ impl<'a> Context<'a> {
self.bind("__wbindgen_number_new", &|me| { self.bind("__wbindgen_number_new", &|me| {
me.expose_add_heap_object(); me.expose_add_heap_object();
Ok(String::from( Ok(String::from("function(i) { return addHeapObject(i); }"))
"
function(i) {
return addHeapObject(i);
}
",
))
})?; })?;
self.bind("__wbindgen_number_get", &|me| { self.bind("__wbindgen_number_get", &|me| {
@ -370,7 +342,7 @@ impl<'a> Context<'a> {
" "
function(i) { function(i) {
const obj = getObject(i).original; const obj = getObject(i).original;
dropRef(i); dropObject(i);
if (obj.cnt-- == 1) { if (obj.cnt-- == 1) {
obj.a = 0; obj.a = 0;
return 1; return 1;
@ -383,7 +355,7 @@ impl<'a> Context<'a> {
self.bind("__wbindgen_cb_forget", &|me| { self.bind("__wbindgen_cb_forget", &|me| {
me.expose_drop_ref(); me.expose_drop_ref();
Ok("dropRef".to_string()) Ok("dropObject".to_string())
})?; })?;
self.bind("__wbindgen_json_parse", &|me| { self.bind("__wbindgen_json_parse", &|me| {
@ -427,14 +399,7 @@ impl<'a> Context<'a> {
self.bind("__wbindgen_memory", &|me| { self.bind("__wbindgen_memory", &|me| {
me.expose_add_heap_object(); me.expose_add_heap_object();
let mem = me.memory(); let mem = me.memory();
Ok(format!( Ok(format!("function() {{ return addHeapObject({}); }}", mem))
"
function() {{
return addHeapObject({});
}}
",
mem
))
})?; })?;
self.bind("__wbindgen_module", &|me| { self.bind("__wbindgen_module", &|me| {
@ -916,149 +881,54 @@ impl<'a> Context<'a> {
if !self.exposed_globals.insert("drop_ref") { if !self.exposed_globals.insert("drop_ref") {
return; return;
} }
self.expose_global_slab(); self.expose_global_heap();
self.expose_global_slab_next(); self.expose_global_heap_next();
let validate_owned = if self.config.debug {
String::from( // Note that here we check if `idx` shouldn't actually be dropped. This
" // is due to the fact that `JsValue::null()` and friends can be passed
if ((idx & 1) === 1) throw new Error('cannot drop ref of stack objects'); // by value to JS where we'll automatically call this method. Those
", // constants, however, cannot be dropped. See #1054 for removing this
) // branch.
} else { //
String::new() // Otherwise the free operation here is pretty simple, just appending to
}; // the linked list of heap slots that are free.
let dec_ref = if self.config.debug {
String::from(
"
if (typeof(obj) === 'number') throw new Error('corrupt slab');
obj.cnt -= 1;
if (obj.cnt > 0) return;
",
)
} else {
String::from(
"
obj.cnt -= 1;
if (obj.cnt > 0) return;
",
)
};
self.global(&format!( self.global(&format!(
" "
function dropRef(idx) {{ function dropObject(idx) {{
{}
idx = idx >> 1;
if (idx < {}) return; if (idx < {}) return;
let obj = slab[idx]; heap[idx] = heap_next;
{} heap_next = idx;
// If we hit 0 then free up our space in the slab
slab[idx] = slab_next;
slab_next = idx;
}} }}
", ",
validate_owned, INITIAL_HEAP_OFFSET + INITIAL_HEAP_VALUES.len(),
INITIAL_SLAB_VALUES.len(),
dec_ref
)); ));
} }
fn expose_global_stack(&mut self) { fn expose_global_heap(&mut self) {
if !self.exposed_globals.insert("stack") { if !self.exposed_globals.insert("heap") {
return; return;
} }
self.global(&format!( self.global(&format!("const heap = new Array({});", INITIAL_HEAP_OFFSET));
" self.global(&format!("heap.push({});", INITIAL_HEAP_VALUES.join(", ")));
const stack = [];
"
));
if self.config.debug {
self.export(
"assertStackEmpty",
"
function() {
if (stack.length === 0) return;
throw new Error('stack is not currently empty');
}
",
None,
);
}
} }
fn expose_global_slab(&mut self) { fn expose_global_heap_next(&mut self) {
if !self.exposed_globals.insert("slab") { if !self.exposed_globals.insert("heap_next") {
return; return;
} }
let initial_values = INITIAL_SLAB_VALUES self.expose_global_heap();
.iter() self.global("let heap_next = heap.length;");
.map(|s| format!("{{ obj: {} }}", s))
.collect::<Vec<_>>();
self.global(&format!("const slab = [{}];", initial_values.join(", ")));
if self.config.debug {
self.export(
"assertSlabEmpty",
&format!(
"
function() {{
for (let i = {}; i < slab.length; i++) {{
if (typeof(slab[i]) === 'number') continue;
throw new Error('slab is not currently empty');
}}
}}
",
initial_values.len()
),
None,
);
}
}
fn expose_global_slab_next(&mut self) {
if !self.exposed_globals.insert("slab_next") {
return;
}
self.expose_global_slab();
self.global(
"
let slab_next = slab.length;
",
);
} }
fn expose_get_object(&mut self) { fn expose_get_object(&mut self) {
if !self.exposed_globals.insert("get_object") { if !self.exposed_globals.insert("get_object") {
return; return;
} }
self.expose_global_stack(); self.expose_global_heap();
self.expose_global_slab();
let get_obj = if self.config.debug { // Accessing a heap object is just a simple index operation due to how
String::from( // the stack/heap are laid out.
" self.global("function getObject(idx) { return heap[idx]; }");
if (typeof(val) === 'number') throw new Error('corrupt slab');
return val.obj;
",
)
} else {
String::from(
"
return val.obj;
",
)
};
self.global(&format!(
"
function getObject(idx) {{
if ((idx & 1) === 1) {{
return stack[idx >> 1];
}} else {{
const val = slab[idx >> 1];
{}
}}
}}
",
get_obj
));
} }
fn expose_assert_num(&mut self) { fn expose_assert_num(&mut self) {
@ -1510,18 +1380,32 @@ impl<'a> Context<'a> {
); );
} }
fn expose_global_stack_pointer(&mut self) {
if !self.exposed_globals.insert("stack_pointer") {
return;
}
self.global(&format!("let stack_pointer = {};", INITIAL_HEAP_OFFSET));
}
fn expose_borrowed_objects(&mut self) { fn expose_borrowed_objects(&mut self) {
if !self.exposed_globals.insert("borrowed_objects") { if !self.exposed_globals.insert("borrowed_objects") {
return; return;
} }
self.expose_global_stack(); self.expose_global_heap();
self.expose_global_stack_pointer();
// Our `stack_pointer` points to where we should start writing stack
// objects, and the `stack_pointer` is incremented in a `finally` block
// after executing this. Once we've reserved stack space we write the
// value. Eventually underflow will throw an exception, but JS sort of
// just handles it today...
self.global( self.global(
" "
function addBorrowedObject(obj) { function addBorrowedObject(obj) {
stack.push(obj); if (stack_pointer == 1) throw new Error('out of js stack');
return ((stack.length - 1) << 1) | 1; heap[--stack_pointer] = obj;
return stack_pointer;
} }
", "
); );
} }
@ -1535,7 +1419,7 @@ impl<'a> Context<'a> {
" "
function takeObject(idx) { function takeObject(idx) {
const ret = getObject(idx); const ret = getObject(idx);
dropRef(idx); dropObject(idx);
return ret; return ret;
} }
", ",
@ -1546,34 +1430,34 @@ impl<'a> Context<'a> {
if !self.exposed_globals.insert("add_heap_object") { if !self.exposed_globals.insert("add_heap_object") {
return; return;
} }
self.expose_global_slab(); self.expose_global_heap();
self.expose_global_slab_next(); self.expose_global_heap_next();
let set_slab_next = if self.config.debug { let set_heap_next = if self.config.debug {
String::from( String::from(
" "
if (typeof(next) !== 'number') throw new Error('corrupt slab'); if (typeof(heap_next) !== 'number') throw new Error('corrupt heap');
slab_next = next;
", ",
) )
} else { } else {
String::from( String::new()
"
slab_next = next;
",
)
}; };
// Allocating a slot on the heap first goes through the linked list
// (starting at `heap_next`). Once that linked list is exhausted we'll
// be pointing beyond the end of the array, at which point we'll reserve
// one more slot and use that.
self.global(&format!( self.global(&format!(
" "
function addHeapObject(obj) {{ function addHeapObject(obj) {{
if (slab_next === slab.length) slab.push(slab.length + 1); if (heap_next === heap.length) heap.push(heap.length + 1);
const idx = slab_next; const idx = heap_next;
const next = slab[idx]; heap_next = heap[idx];
{} {}
slab[idx] = {{ obj, cnt: 1 }}; heap[idx] = obj;
return idx << 1; return idx;
}} }}
", ",
set_slab_next set_heap_next
)); ));
} }

View File

@ -5,18 +5,21 @@ around JS objects in wasm, but that's not allowed today! While indeed true,
that's where the polyfill comes in. that's where the polyfill comes in.
The question here is how we shoehorn JS objects into a `u32` for wasm to use. The question here is how we shoehorn JS objects into a `u32` for wasm to use.
The current strategy for this approach is to maintain two module-local variables The current strategy for this approach is to maintain a module-local variable
in the generated `foo.js` file: a stack and a heap. in the generated `foo.js` file: a `heap`.
### Temporary JS objects on the stack ### Temporary JS objects on the "stack"
The stack in `foo.js` is, well, a stack. JS objects are pushed on the top of the The first slots in the `heap` in `foo.js` are considered a stack. This stack,
stack, and their index in the stack is the identifier that's passed to wasm. JS like typical program execution stacks, grows down. JS objects are pushed on the
objects are then only removed from the top of the stack as well. This data bottom of the stack, and their index in the stack is the identifier that's passed
structure is mainly useful for efficiently passing a JS object into wasm without to wasm. A stack pointer is maintained to figure out where the next item is
a sort of "heap allocation". The downside of this, however, is that it only pushed.
works for when wasm doesn't hold onto a JS object (aka it only gets a
"reference" in Rust parlance). JS objects are then only removed from the bottom of the stack as well. Removal
is simply storing null then incrementing a counter. Because of the "stack-y"
nature of this sceheme it only works for when wasm doesn't hold onto a JS object
(aka it only gets a "reference" in Rust parlance).
Let's take a look at an example. Let's take a look at an example.
@ -47,11 +50,14 @@ and what we actually generate looks something like:
// foo.js // foo.js
import * as wasm from './foo_bg'; import * as wasm from './foo_bg';
const stack = []; const heap = new Array(32);
heap.push(undefined, null, true, false);
let stack_pointer = 32;
function addBorrowedObject(obj) { function addBorrowedObject(obj) {
stack.push(obj); stack_pointer -= 1;
return stack.length - 1; heap[stack_pointer] = obj;
return stack_pointer;
} }
export function foo(arg0) { export function foo(arg0) {
@ -59,7 +65,7 @@ export function foo(arg0) {
try { try {
wasm.foo(idx0); wasm.foo(idx0);
} finally { } finally {
stack.pop(); heap[stack_pointer++] = undefined;
} }
} }
``` ```
@ -68,13 +74,13 @@ Here we can see a few notable points of action:
* The wasm file was renamed to `foo_bg.wasm`, and we can see how the JS module * The wasm file was renamed to `foo_bg.wasm`, and we can see how the JS module
generated here is importing from the wasm file. generated here is importing from the wasm file.
* Next we can see our `stack` module variable which is used to push/pop items * Next we can see our `heap` module variable which is to store all JS values
from the stack. reference-able from wasm.
* Our exported function `foo`, takes an arbitrary argument, `arg0`, which is * Our exported function `foo`, takes an arbitrary argument, `arg0`, which is
converted to an index with the `addBorrowedObject` object function. The index converted to an index with the `addBorrowedObject` object function. The index
is then passed to wasm so wasm can operate with it. is then passed to wasm so wasm can operate with it.
* Finally, we have a `finally` which frees the stack slot as it's no longer * Finally, we have a `finally` which frees the stack slot as it's no longer
used, issuing a `pop` for what was pushed at the start of the function. used, popping the value that was pushed at the start of the function.
It's also helpful to dig into the Rust side of things to see what's going on It's also helpful to dig into the Rust side of things to see what's going on
there! Let's take a look at the code that `#[wasm_bindgen]` generates in Rust: there! Let's take a look at the code that `#[wasm_bindgen]` generates in Rust:
@ -104,12 +110,13 @@ And as with the JS, the notable points here are:
in a `JsValue`. There's some trickery here that's not worth going into just in a `JsValue`. There's some trickery here that's not worth going into just
yet, but we'll see in a bit what's happening under the hood. yet, but we'll see in a bit what's happening under the hood.
### Long-lived JS objects in a slab ### Long-lived JS objects
The above strategy is useful when JS objects are only temporarily used in Rust, The above strategy is useful when JS objects are only temporarily used in Rust,
for example only during one function call. Sometimes, though, objects may have a for example only during one function call. Sometimes, though, objects may have a
dynamic lifetime or otherwise need to be stored on Rust's heap. To cope with dynamic lifetime or otherwise need to be stored on Rust's heap. To cope with
this there's a second half of management of JS objects, a slab. this there's a second half of management of JS objects, naturally corresponding
to the other side of the JS `heap` array.
JS Objects passed to wasm that are not references are assumed to have a dynamic JS Objects passed to wasm that are not references are assumed to have a dynamic
lifetime inside of the wasm module. As a result the strict push/pop of the stack lifetime inside of the wasm module. As a result the strict push/pop of the stack
@ -135,16 +142,16 @@ different. Let's see the generated JS's slab in action:
```js ```js
import * as wasm from './foo_bg'; // imports from wasm file import * as wasm from './foo_bg'; // imports from wasm file
const slab = []; const heap = new Array(32);
let slab_next = 0; heap.push(undefined, null, true, false);
let heap_next = 36;
function addHeapObject(obj) { function addHeapObject(obj) {
if (slab_next === slab.length) if (heap_next === heap.length)
slab.push(slab.length + 1); heap.push(heap.length + 1);
const idx = slab_next; const idx = heap_next;
const next = slab[idx]; heap_next = heap[idx];
slab_next = next; heap[idx] = obj;
slab[idx] = { obj, cnt: 1 };
return idx; return idx;
} }
@ -154,24 +161,17 @@ export function foo(arg0) {
} }
export function __wbindgen_object_drop_ref(idx) { export function __wbindgen_object_drop_ref(idx) {
let obj = slab[idx]; heap[idx ] = heap_next;
obj.cnt -= 1; heap_next = idx;
if (obj.cnt > 0)
return;
// If we hit 0 then free up our space in the slab
slab[idx] = slab_next;
slab_next = idx;
} }
``` ```
Unlike before we're now calling `addHeapObject` on the argument to `foo` rather Unlike before we're now calling `addHeapObject` on the argument to `foo` rather
than `addBorrowedObject`. This function will use `slab` and `slab_next` as a than `addBorrowedObject`. This function will use `heap` and `heap_next` as a
slab allocator to acquire a slot to store the object, placing a structure there slab allocator to acquire a slot to store the object, placing a structure there
once it's found. once it's found. Note that this is going on the right-half of the array, unlike
the stack which resides on the left half. This discipline mirrors the stack/heap
Note here that a reference count is used in addition to storing the object. in normal programs, roughly.
That's so we can create multiple references to the JS object in Rust without
using `Rc`, but it's overall not too important to worry about here.
Another curious aspect of this generated module is the Another curious aspect of this generated module is the
`__wbindgen_object_drop_ref` function. This is one that's actually imported from `__wbindgen_object_drop_ref` function. This is one that's actually imported from
@ -229,10 +229,9 @@ If you'll recall as well, when we took `&JsValue` above we generated a wrapper
of `ManuallyDrop` around the local binding, and that's because we wanted to of `ManuallyDrop` around the local binding, and that's because we wanted to
avoid invoking this destructor when the object comes from the stack. avoid invoking this destructor when the object comes from the stack.
### Indexing both a slab and the stack ### Working with `heap` in reality
You might be thinking at this point that this system may not work! There's The above explanations are pretty close to what happens today, but in reality
indexes into both the slab and the stack mixed up, but how do we differentiate? there's a few differences especially around handling constant values like
It turns out that the examples above have been simplified a bit, but otherwise `undefined`, `null`, etc. Be sure to check out the actual generated JS and the
the lowest bit is currently used as an indicator of whether you're a slab or a generation code for the full details!
stack index.

View File

@ -67,11 +67,12 @@ pub struct JsValue {
_marker: marker::PhantomData<*mut u8>, // not at all threadsafe _marker: marker::PhantomData<*mut u8>, // not at all threadsafe
} }
const JSIDX_UNDEFINED: u32 = 0; const JSIDX_OFFSET: u32 = 32; // keep in sync with js/mod.rs
const JSIDX_NULL: u32 = 2; const JSIDX_UNDEFINED: u32 = JSIDX_OFFSET + 0;
const JSIDX_TRUE: u32 = 4; const JSIDX_NULL: u32 = JSIDX_OFFSET + 1;
const JSIDX_FALSE: u32 = 6; const JSIDX_TRUE: u32 = JSIDX_OFFSET + 2;
const JSIDX_RESERVED: u32 = 8; const JSIDX_FALSE: u32 = JSIDX_OFFSET + 3;
const JSIDX_RESERVED: u32 = JSIDX_OFFSET + 4;
impl JsValue { impl JsValue {
/// The `null` JS value constant. /// The `null` JS value constant.
@ -533,13 +534,12 @@ impl Drop for JsValue {
#[inline] #[inline]
fn drop(&mut self) { fn drop(&mut self) {
unsafe { unsafe {
// The first bit indicates whether this is a stack value or not. // We definitely should never drop anything in the stack area
// Stack values should never be dropped (they're always in debug_assert!(self.idx >= JSIDX_OFFSET);
// `ManuallyDrop`)
debug_assert!(self.idx & 1 == 0);
// We don't want to drop the first few elements as they're all // Otherwise if we're not dropping one of our reserved values,
// reserved, but everything else is safe to drop. // actually call the intrinsic. See #1054 for eventually removing
// this branch.
if self.idx >= JSIDX_RESERVED { if self.idx >= JSIDX_RESERVED {
__wbindgen_object_drop_ref(self.idx); __wbindgen_object_drop_ref(self.idx);
} }