From 43ee52bcbfbef87685e16e5b0c81e3f3ef7cdb46 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 6 Feb 2018 16:06:21 -0800 Subject: [PATCH] Change schemes for encoding custom type names Store JSON a utf-32, map hashes of names to a `char` and store that name in the map, and then do a reverse mapping when generating JS --- crates/wasm-bindgen-cli-support/src/js.rs | 28 +++++++++----- crates/wasm-bindgen-cli-support/src/lib.rs | 45 +++++++++++++++++++--- crates/wasm-bindgen-macro/src/ast.rs | 40 +++++++++---------- crates/wasm-bindgen-macro/src/lib.rs | 15 ++++---- crates/wasm-bindgen-shared/Cargo.toml | 1 + crates/wasm-bindgen-shared/src/lib.rs | 28 +++++++++++++- src/convert.rs | 20 +++++----- tests/classes.rs | 4 +- 8 files changed, 124 insertions(+), 57 deletions(-) diff --git a/crates/wasm-bindgen-cli-support/src/js.rs b/crates/wasm-bindgen-cli-support/src/js.rs index af480a69..6ae3fd96 100644 --- a/crates/wasm-bindgen-cli-support/src/js.rs +++ b/crates/wasm-bindgen-cli-support/src/js.rs @@ -1,4 +1,5 @@ -use std::collections::HashSet; +use std::char; +use std::collections::{HashSet, HashMap}; use shared; use parity_wasm::elements::*; @@ -14,6 +15,7 @@ pub struct Context<'a> { pub config: &'a Bindgen, pub module: &'a mut Module, pub imports_to_rewrite: HashSet, + pub custom_type_names: HashMap, } pub struct SubContext<'a, 'b: 'a> { @@ -22,6 +24,18 @@ pub struct SubContext<'a, 'b: 'a> { } impl<'a> Context<'a> { + pub fn add_custom_type_names(&mut self, program: &shared::Program) { + for custom in program.custom_type_names.iter() { + assert!(self.custom_type_names.insert(custom.descriptor, + custom.name.clone()).is_none()); + let val = custom.descriptor as u32; + assert!(val & 1 == 0); + let descriptor = char::from_u32(val | 1).unwrap(); + assert!(self.custom_type_names.insert(descriptor, + custom.name.clone()).is_none()); + } + } + pub fn finalize(&mut self, module_name: &str) -> (String, String) { { let mut bind = |name: &str, f: &Fn(&mut Self) -> String| { @@ -756,9 +770,7 @@ impl<'a, 'b> SubContext<'a, 'b> { pass(&format!("idx{}", i)); } custom if (custom as u32) & shared::TYPE_CUSTOM_REF_FLAG != 0 => { - let custom = ((custom as u32) & !shared::TYPE_CUSTOM_REF_FLAG) - - shared::TYPE_CUSTOM_START; - let s = &self.program.custom_type_names[custom as usize / 2]; + let s = self.cx.custom_type_names[&custom].clone(); dst_ts.push_str(&format!(": {}", s)); if self.cx.config.debug { self.cx.expose_assert_class(); @@ -769,8 +781,7 @@ impl<'a, 'b> SubContext<'a, 'b> { pass(&format!("{}.ptr", name)); } custom => { - let custom = (custom as u32) - shared::TYPE_CUSTOM_START; - let s = &self.program.custom_type_names[custom as usize / 2]; + let s = self.cx.custom_type_names[&custom].clone(); dst_ts.push_str(&format!(": {}", s)); if self.cx.config.debug { self.cx.expose_assert_class(); @@ -823,9 +834,8 @@ impl<'a, 'b> SubContext<'a, 'b> { Some(&shared::TYPE_JS_REF) | Some(&shared::TYPE_BORROWED_STR) => panic!(), Some(&t) if (t as u32) & shared::TYPE_CUSTOM_REF_FLAG != 0 => panic!(), - Some(&custom) => { - let custom = (custom as u32) - shared::TYPE_CUSTOM_START; - let name = &self.program.custom_type_names[custom as usize / 2]; + Some(custom) => { + let name = &self.cx.custom_type_names[custom]; dst_ts.push_str(": "); dst_ts.push_str(name); if self.cx.config.debug { diff --git a/crates/wasm-bindgen-cli-support/src/lib.rs b/crates/wasm-bindgen-cli-support/src/lib.rs index e4304d4e..9986f29a 100644 --- a/crates/wasm-bindgen-cli-support/src/lib.rs +++ b/crates/wasm-bindgen-cli-support/src/lib.rs @@ -5,9 +5,11 @@ extern crate wasm_bindgen_shared as shared; extern crate serde_json; extern crate wasm_gc; +use std::char; use std::fs::File; use std::io::Write; use std::path::{Path, PathBuf}; +use std::slice; use failure::Error; use parity_wasm::elements::*; @@ -75,9 +77,13 @@ impl Bindgen { exposed_globals: Default::default(), required_internal_exports: Default::default(), imports_to_rewrite: Default::default(), + custom_type_names: Default::default(), config: &self, module: &mut module, }; + for program in programs.iter() { + cx.add_custom_type_names(program); + } for program in programs.iter() { js::SubContext { program, @@ -126,14 +132,34 @@ fn extract_programs(module: &mut Module) -> Vec { None => return ret, }; + 'outer: for i in (0..data.entries().len()).rev() { { - let value = data.entries()[i].value(); - if !value.starts_with(b"wbg:") { - continue + let mut value = bytes_to_u32(data.entries()[i].value()); + loop { + match value.iter().position(|i| i.0 == (b'w' as u32)) { + Some(i) => value = &value[i + 1..], + None => continue 'outer, + } + match value.iter().position(|i| i.0 == (b'b' as u32)) { + Some(i) => value = &value[i + 1..], + None => continue 'outer, + } + match value.iter().position(|i| i.0 == (b'g' as u32)) { + Some(i) => value = &value[i + 1..], + None => continue 'outer, + } + match value.iter().position(|i| i.0 == (b':' as u32)) { + Some(i) => value = &value[i + 1..], + None => continue 'outer, + } + break } - let json = &value[4..]; - let p = match serde_json::from_slice(json) { + // TODO: shouldn't take the rest of the value + let json = value.iter() + .map(|i| char::from_u32(i.0).unwrap()) + .collect::(); + let p = match serde_json::from_str(&json) { Ok(f) => f, Err(e) => { panic!("failed to decode what looked like wasm-bindgen data: {}", e) @@ -145,3 +171,12 @@ fn extract_programs(module: &mut Module) -> Vec { } return ret } + +#[repr(packed)] +struct Unaligned(u32); + +fn bytes_to_u32(a: &[u8]) -> &[Unaligned] { + unsafe { + slice::from_raw_parts(a.as_ptr() as *const Unaligned, a.len() / 4) + } +} diff --git a/crates/wasm-bindgen-macro/src/ast.rs b/crates/wasm-bindgen-macro/src/ast.rs index 314ceaf8..9fef0e1b 100644 --- a/crates/wasm-bindgen-macro/src/ast.rs +++ b/crates/wasm-bindgen-macro/src/ast.rs @@ -217,7 +217,15 @@ impl Program { ("free_functions", &|a| a.list(&self.free_functions, Function::wbg_literal)), ("imports", &|a| a.list(&self.imports, Import::wbg_literal)), ("imported_structs", &|a| a.list(&self.imported_structs, ImportStruct::wbg_literal)), - ("custom_type_names", &|a| a.list(&self.structs, |s, a| a.str(s.name.as_ref()))), + ("custom_type_names", &|a| { + a.list(&self.structs, |s, a| { + let val = shared::name_to_descriptor(s.name.as_ref()); + a.fields(&[ + ("descriptor", &|a| a.char(val)), + ("name", &|a| a.str(s.name.as_ref())) + ]); + }) + }), ]); return a.cnt } @@ -373,14 +381,14 @@ impl Type { Type::String => a.char(shared::TYPE_STRING), Type::ByValue(ref t) => { a.as_char(my_quote! { - <#t as ::wasm_bindgen::convert::WasmBoundary>::DESCRIPTOR as u8 + <#t as ::wasm_bindgen::convert::WasmBoundary>::DESCRIPTOR }); } Type::ByRef(ref ty) | Type::ByMutRef(ref ty) => { a.as_char(my_quote! { - ((<#ty as ::wasm_bindgen::convert::WasmBoundary>::DESCRIPTOR as u32) | - ::wasm_bindgen::convert::DESCRIPTOR_CUSTOM_REF_FLAG) as u8 + (<#ty as ::wasm_bindgen::convert::WasmBoundary>::DESCRIPTOR | + ::wasm_bindgen::convert::DESCRIPTOR_CUSTOM_REF_FLAG) }); } } @@ -559,17 +567,17 @@ struct LiteralBuilder<'a> { } impl<'a> LiteralBuilder<'a> { - fn byte(&mut self, byte: u8) { + fn char_lit(&mut self, c: char) { if self.cnt > 0 { ::syn::token::Comma::default().to_tokens(self.dst); } self.cnt += 1; - byte.to_tokens(self.dst); + (c as u32).to_tokens(self.dst); } fn append(&mut self, s: &str) { - for byte in s.bytes() { - self.byte(byte); + for c in s.chars() { + self.char_lit(c); } } @@ -588,21 +596,9 @@ impl<'a> LiteralBuilder<'a> { } fn char(&mut self, s: char) { - self.append("\"\\u"); - let s = s as u32; - self.byte(to_hex((s >> 12) as u8)); - self.byte(to_hex((s >> 8) as u8)); - self.byte(to_hex((s >> 4) as u8)); - self.byte(to_hex((s >> 0) as u8)); self.append("\""); - - fn to_hex(a: u8) -> u8 { - let a = a & 0xf; - match a { - 0 ... 9 => b'0' + a, - _ => b'a'+ a - 10, - } - } + self.char_lit(s); + self.append("\""); } fn as_char(&mut self, tokens: Tokens) { diff --git a/crates/wasm-bindgen-macro/src/lib.rs b/crates/wasm-bindgen-macro/src/lib.rs index 5e1b5969..7f8cf34a 100644 --- a/crates/wasm-bindgen-macro/src/lib.rs +++ b/crates/wasm-bindgen-macro/src/lib.rs @@ -10,7 +10,6 @@ extern crate proc_macro2; extern crate serde_json; extern crate wasm_bindgen_shared as shared; -use std::char; use std::sync::atomic::*; use proc_macro::TokenStream; @@ -79,8 +78,8 @@ pub fn wasm_bindgen(input: TokenStream) -> TokenStream { for function in program.free_functions.iter() { bindgen_fn(function, &mut ret); } - for (i, s) in program.structs.iter().enumerate() { - bindgen_struct(i, s, &mut ret); + for s in program.structs.iter() { + bindgen_struct(s, &mut ret); } for i in program.imports.iter() { bindgen_import(i, &mut ret); @@ -103,7 +102,7 @@ pub fn wasm_bindgen(input: TokenStream) -> TokenStream { (my_quote! { #[no_mangle] #[allow(non_upper_case_globals)] - pub static #generated_static_name: [u8; #generated_static_length] = + pub static #generated_static_name: [u32; #generated_static_length] = [#generated_static_value]; }).to_tokens(&mut ret); @@ -121,7 +120,7 @@ fn bindgen_fn(function: &ast::Function, into: &mut Tokens) { into) } -fn bindgen_struct(idx: usize, s: &ast::Struct, into: &mut Tokens) { +fn bindgen_struct(s: &ast::Struct, into: &mut Tokens) { for f in s.functions.iter() { bindgen_struct_fn(s, f, into); } @@ -131,11 +130,11 @@ fn bindgen_struct(idx: usize, s: &ast::Struct, into: &mut Tokens) { let name = &s.name; let free_fn = s.free_function(); - let c = char::from_u32(idx as u32 * 2 + shared::TYPE_CUSTOM_START); + let c = shared::name_to_descriptor(name.as_ref()) as u32; (my_quote! { impl ::wasm_bindgen::convert::WasmBoundary for #name { type Js = u32; - const DESCRIPTOR: char = #c; + const DESCRIPTOR: u32 = #c; fn into_js(self) -> u32 { Box::into_raw(Box::new(::wasm_bindgen::__rt::WasmRefCell::new(self))) as u32 @@ -387,7 +386,7 @@ fn bindgen_imported_struct(import: &ast::ImportStruct, tokens: &mut Tokens) { impl ::wasm_bindgen::convert::WasmBoundary for #name { type Js = <::wasm_bindgen::JsValue as ::wasm_bindgen::convert::WasmBoundary>::Js; - const DESCRIPTOR: char = <::wasm_bindgen::JsValue as + const DESCRIPTOR: u32 = <::wasm_bindgen::JsValue as ::wasm_bindgen::convert::WasmBoundary>::DESCRIPTOR; fn into_js(self) -> Self::Js { diff --git a/crates/wasm-bindgen-shared/Cargo.toml b/crates/wasm-bindgen-shared/Cargo.toml index 72bb7ad8..b51c84e0 100644 --- a/crates/wasm-bindgen-shared/Cargo.toml +++ b/crates/wasm-bindgen-shared/Cargo.toml @@ -6,3 +6,4 @@ authors = ["Alex Crichton "] [dependencies] serde_derive = "1" serde = "1" +fnv = "1" diff --git a/crates/wasm-bindgen-shared/src/lib.rs b/crates/wasm-bindgen-shared/src/lib.rs index 9d0ba1a4..cc193c42 100644 --- a/crates/wasm-bindgen-shared/src/lib.rs +++ b/crates/wasm-bindgen-shared/src/lib.rs @@ -1,5 +1,9 @@ #[macro_use] extern crate serde_derive; +extern crate fnv; + +use std::char; +use std::hash::{Hash, Hasher}; #[derive(Serialize, Deserialize)] pub struct Program { @@ -7,7 +11,7 @@ pub struct Program { pub free_functions: Vec, pub imports: Vec, pub imported_structs: Vec, - pub custom_type_names: Vec, + pub custom_type_names: Vec, } #[derive(Serialize, Deserialize)] @@ -50,6 +54,12 @@ pub struct Function { pub ret: Option, } +#[derive(Serialize, Deserialize)] +pub struct CustomTypeName { + pub descriptor: char, + pub name: String, +} + pub fn free_function(struct_name: &str) -> String { let mut name = format!("__wbg_"); name.extend(struct_name @@ -91,3 +101,19 @@ pub const TYPE_JS_REF: char = '\u{63}'; pub const TYPE_CUSTOM_START: u32 = 0x64; pub const TYPE_CUSTOM_REF_FLAG: u32 = 1; + +pub fn name_to_descriptor(name: &str) -> char { + const CHAR_MAX: u32 = 0x10ffff; + const CHAR_HOLE_START: u32 = 0xd800; + const CHAR_HOLE_END: u32 = 0xe000; + let mut h = fnv::FnvHasher::default(); + name.hash(&mut h); + let val = h.finish(); + let range = (CHAR_MAX - (CHAR_HOLE_END - CHAR_HOLE_START) - TYPE_CUSTOM_START) / 2; + let idx = (val % (range as u64)) as u32; + let mut ret = TYPE_CUSTOM_START + idx * 2; + if CHAR_HOLE_START <= ret && ret < CHAR_HOLE_END { + ret += CHAR_HOLE_END - CHAR_HOLE_START; + } + char::from_u32(ret).unwrap() +} diff --git a/src/convert.rs b/src/convert.rs index aef0505e..4dfd234d 100644 --- a/src/convert.rs +++ b/src/convert.rs @@ -5,13 +5,13 @@ use super::JsValue; // keep in sync with shared/src/lib.rs TYPE constants pub const DESCRIPTOR_CUSTOM_REF_FLAG: u32 = 0x1; -pub const DESCRIPTOR_NUMBER: char = '\u{5e}'; -pub const DESCRIPTOR_BOOLEAN: char = '\u{61}'; -pub const DESCRIPTOR_JS_OWNED: char = '\u{62}'; +pub const DESCRIPTOR_NUMBER: u32 = 0x5e; +pub const DESCRIPTOR_BOOLEAN: u32 = 0x61; +pub const DESCRIPTOR_JS_OWNED: u32 = 0x62; pub trait WasmBoundary { type Js: WasmAbi; - const DESCRIPTOR: char; + const DESCRIPTOR: u32; fn into_js(self) -> Self::Js; unsafe fn from_js(js: Self::Js) -> Self; @@ -44,7 +44,7 @@ macro_rules! simple { ($($t:tt)*) => ($( impl WasmBoundary for $t { type Js = $t; - const DESCRIPTOR: char = DESCRIPTOR_NUMBER; + const DESCRIPTOR: u32 = DESCRIPTOR_NUMBER; fn into_js(self) -> $t { self } unsafe fn from_js(js: $t) -> $t { js } @@ -58,7 +58,7 @@ macro_rules! as_u32 { ($($t:tt)*) => ($( impl WasmBoundary for $t { type Js = u32; - const DESCRIPTOR: char = DESCRIPTOR_NUMBER; + const DESCRIPTOR: u32 = DESCRIPTOR_NUMBER; fn into_js(self) -> u32 { self as u32 } unsafe fn from_js(js: u32) -> $t { js as $t } @@ -70,7 +70,7 @@ as_u32!(i8 u8 i16 u16 i32 isize usize); impl WasmBoundary for bool { type Js = u32; - const DESCRIPTOR: char = DESCRIPTOR_BOOLEAN; + const DESCRIPTOR: u32 = DESCRIPTOR_BOOLEAN; fn into_js(self) -> u32 { self as u32 } unsafe fn from_js(js: u32) -> bool { js != 0 } @@ -78,7 +78,7 @@ impl WasmBoundary for bool { impl WasmBoundary for *const T { type Js = u32; - const DESCRIPTOR: char = DESCRIPTOR_NUMBER; + const DESCRIPTOR: u32 = DESCRIPTOR_NUMBER; fn into_js(self) -> u32 { self as u32 } unsafe fn from_js(js: u32) -> *const T { js as *const T } @@ -86,7 +86,7 @@ impl WasmBoundary for *const T { impl WasmBoundary for *mut T { type Js = u32; - const DESCRIPTOR: char = DESCRIPTOR_NUMBER; + const DESCRIPTOR: u32 = DESCRIPTOR_NUMBER; fn into_js(self) -> u32 { self as u32 } unsafe fn from_js(js: u32) -> *mut T { js as *mut T } @@ -94,7 +94,7 @@ impl WasmBoundary for *mut T { impl WasmBoundary for JsValue { type Js = u32; - const DESCRIPTOR: char = DESCRIPTOR_JS_OWNED; + const DESCRIPTOR: u32 = DESCRIPTOR_JS_OWNED; fn into_js(self) -> u32 { let ret = self.idx; diff --git a/tests/classes.rs b/tests/classes.rs index 001b97aa..2ac33773 100644 --- a/tests/classes.rs +++ b/tests/classes.rs @@ -243,7 +243,7 @@ fn bindgen_twice() { pub struct B {} impl B { - pub fn new() -> B { + pub fn new(a: &A) -> B { B {} } } @@ -254,7 +254,7 @@ fn bindgen_twice() { export function test() { let a = A.new(); - let b = B.new(); + let b = B.new(a); a.free(); b.free(); }