mirror of
https://github.com/fluencelabs/wasm-bindgen
synced 2025-03-28 07:51:07 +00:00
Add support for TextEncoder#encodeInto
This commit adds support for the recently implemented standard of [`TextEncoder#encodeInto`][standard]. This new function is a "bring your own buffer" style function where we can avoid an intermediate allocation and copy by encoding strings directly into wasm's memory. Currently we feature-detect whether `encodeInto` exists as it is only implemented in recent browsers and not in all browsers. Additionally this commit emits the binding using `encodeInto` by default, but this requires `realloc` functionality to be exposed by the wasm module. Measured locally an empty binary which takes `&str` previously took 7.6k, but after this commit takes 8.7k due to the extra code needed for `realloc`. [standard]: https://encoding.spec.whatwg.org/#dom-textencoder-encodeinto Closes #1172
This commit is contained in:
parent
de85d99acd
commit
745b16e3d2
@ -1,6 +1,6 @@
|
|||||||
use crate::decode;
|
use crate::decode;
|
||||||
use crate::descriptor::{Descriptor, VectorKind};
|
use crate::descriptor::{Descriptor, VectorKind};
|
||||||
use crate::Bindgen;
|
use crate::{Bindgen, EncodeInto};
|
||||||
use failure::{bail, Error, ResultExt};
|
use failure::{bail, Error, ResultExt};
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use walrus::{MemoryId, Module};
|
use walrus::{MemoryId, Module};
|
||||||
@ -1168,19 +1168,77 @@ impl<'a> Context<'a> {
|
|||||||
} else {
|
} else {
|
||||||
""
|
""
|
||||||
};
|
};
|
||||||
self.global(&format!(
|
|
||||||
|
// The first implementation we have for this is to use
|
||||||
|
// `TextEncoder#encode` which has been around for quite some time.
|
||||||
|
let use_encode = format!(
|
||||||
"
|
"
|
||||||
function passStringToWasm(arg) {{
|
|
||||||
{}
|
{}
|
||||||
const buf = cachedTextEncoder.encode(arg);
|
const buf = cachedTextEncoder.encode(arg);
|
||||||
const ptr = wasm.__wbindgen_malloc(buf.length);
|
const ptr = wasm.__wbindgen_malloc(buf.length);
|
||||||
getUint8Memory().set(buf, ptr);
|
getUint8Memory().set(buf, ptr);
|
||||||
WASM_VECTOR_LEN = buf.length;
|
WASM_VECTOR_LEN = buf.length;
|
||||||
return ptr;
|
return ptr;
|
||||||
}}
|
|
||||||
",
|
",
|
||||||
debug
|
debug
|
||||||
));
|
);
|
||||||
|
|
||||||
|
// Another possibility is to use `TextEncoder#encodeInto` which is much
|
||||||
|
// newer and isn't implemented everywhere yet. It's more efficient,
|
||||||
|
// however, becaues it allows us to elide an intermediate allocation.
|
||||||
|
let use_encode_into = format!(
|
||||||
|
"
|
||||||
|
{}
|
||||||
|
let size = arg.length;
|
||||||
|
let ptr = wasm.__wbindgen_malloc(size);
|
||||||
|
let writeOffset = 0;
|
||||||
|
while (true) {{
|
||||||
|
const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size);
|
||||||
|
const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view);
|
||||||
|
arg = arg.substring(read);
|
||||||
|
writeOffset += written;
|
||||||
|
if (arg.length === 0) {{
|
||||||
|
break;
|
||||||
|
}}
|
||||||
|
ptr = wasm.__wbindgen_realloc(ptr, size, size * 2);
|
||||||
|
size *= 2;
|
||||||
|
}}
|
||||||
|
WASM_VECTOR_LEN = writeOffset;
|
||||||
|
return ptr;
|
||||||
|
",
|
||||||
|
debug
|
||||||
|
);
|
||||||
|
|
||||||
|
match self.config.encode_into {
|
||||||
|
EncodeInto::Never => {
|
||||||
|
self.global(&format!(
|
||||||
|
"function passStringToWasm(arg) {{ {} }}",
|
||||||
|
use_encode,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
EncodeInto::Always => {
|
||||||
|
self.require_internal_export("__wbindgen_realloc")?;
|
||||||
|
self.global(&format!(
|
||||||
|
"function passStringToWasm(arg) {{ {} }}",
|
||||||
|
use_encode_into,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
EncodeInto::Test => {
|
||||||
|
self.require_internal_export("__wbindgen_realloc")?;
|
||||||
|
self.global(&format!(
|
||||||
|
"
|
||||||
|
let passStringToWasm;
|
||||||
|
if (typeof cachedTextEncoder.encodeInto === 'function') {{
|
||||||
|
passStringToWasm = function(arg) {{ {} }};
|
||||||
|
}} else {{
|
||||||
|
passStringToWasm = function(arg) {{ {} }};
|
||||||
|
}}
|
||||||
|
",
|
||||||
|
use_encode_into,
|
||||||
|
use_encode,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ pub struct Bindgen {
|
|||||||
// module to be "ready to be instantiated on any thread"
|
// module to be "ready to be instantiated on any thread"
|
||||||
threads: Option<wasm_bindgen_threads_xform::Config>,
|
threads: Option<wasm_bindgen_threads_xform::Config>,
|
||||||
anyref: bool,
|
anyref: bool,
|
||||||
|
encode_into: EncodeInto,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Input {
|
enum Input {
|
||||||
@ -44,6 +45,12 @@ enum Input {
|
|||||||
None,
|
None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub enum EncodeInto {
|
||||||
|
Test,
|
||||||
|
Always,
|
||||||
|
Never,
|
||||||
|
}
|
||||||
|
|
||||||
impl Bindgen {
|
impl Bindgen {
|
||||||
pub fn new() -> Bindgen {
|
pub fn new() -> Bindgen {
|
||||||
Bindgen {
|
Bindgen {
|
||||||
@ -64,6 +71,7 @@ impl Bindgen {
|
|||||||
weak_refs: env::var("WASM_BINDGEN_WEAKREF").is_ok(),
|
weak_refs: env::var("WASM_BINDGEN_WEAKREF").is_ok(),
|
||||||
threads: threads_config(),
|
threads: threads_config(),
|
||||||
anyref: env::var("WASM_BINDGEN_ANYREF").is_ok(),
|
anyref: env::var("WASM_BINDGEN_ANYREF").is_ok(),
|
||||||
|
encode_into: EncodeInto::Test,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,6 +152,11 @@ impl Bindgen {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn encode_into(&mut self, mode: EncodeInto) -> &mut Bindgen {
|
||||||
|
self.encode_into = mode;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn generate<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
pub fn generate<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||||
self._generate(path.as_ref())
|
self._generate(path.as_ref())
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ use failure::{bail, Error};
|
|||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::process;
|
use std::process;
|
||||||
use wasm_bindgen_cli_support::Bindgen;
|
use wasm_bindgen_cli_support::{Bindgen, EncodeInto};
|
||||||
|
|
||||||
// no need for jemalloc bloat in this binary (and we don't need speed)
|
// no need for jemalloc bloat in this binary (and we don't need speed)
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
@ -32,6 +32,8 @@ Options:
|
|||||||
--keep-debug Keep debug sections in wasm files
|
--keep-debug Keep debug sections in wasm files
|
||||||
--remove-name-section Remove the debugging `name` section of the file
|
--remove-name-section Remove the debugging `name` section of the file
|
||||||
--remove-producers-section Remove the telemetry `producers` section
|
--remove-producers-section Remove the telemetry `producers` section
|
||||||
|
--encode-into MODE Whether or not to use TextEncoder#encodeInto,
|
||||||
|
valid values are [test, always, never]
|
||||||
-V --version Print the version number of wasm-bindgen
|
-V --version Print the version number of wasm-bindgen
|
||||||
";
|
";
|
||||||
|
|
||||||
@ -51,6 +53,7 @@ struct Args {
|
|||||||
flag_remove_name_section: bool,
|
flag_remove_name_section: bool,
|
||||||
flag_remove_producers_section: bool,
|
flag_remove_producers_section: bool,
|
||||||
flag_keep_debug: bool,
|
flag_keep_debug: bool,
|
||||||
|
flag_encode_into: Option<String>,
|
||||||
arg_input: Option<PathBuf>,
|
arg_input: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,6 +103,14 @@ fn rmain(args: &Args) -> Result<(), Error> {
|
|||||||
if let Some(ref name) = args.flag_out_name {
|
if let Some(ref name) = args.flag_out_name {
|
||||||
b.out_name(name);
|
b.out_name(name);
|
||||||
}
|
}
|
||||||
|
if let Some(mode) = &args.flag_encode_into {
|
||||||
|
match mode.as_str() {
|
||||||
|
"test" => b.encode_into(EncodeInto::Test),
|
||||||
|
"always" => b.encode_into(EncodeInto::Always),
|
||||||
|
"never" => b.encode_into(EncodeInto::Never),
|
||||||
|
s => bail!("invalid encode-into mode: `{}`", s),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
let out_dir = match args.flag_out_dir {
|
let out_dir = match args.flag_out_dir {
|
||||||
Some(ref p) => p,
|
Some(ref p) => p,
|
||||||
|
23
src/lib.rs
23
src/lib.rs
@ -892,7 +892,7 @@ pub mod __rt {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if_std! {
|
if_std! {
|
||||||
use std::alloc::{alloc, dealloc, Layout};
|
use std::alloc::{alloc, dealloc, realloc, Layout};
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
@ -911,6 +911,27 @@ pub mod __rt {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
malloc_failure();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern "C" fn __wbindgen_realloc(ptr: *mut u8, old_size: usize, new_size: usize) -> *mut u8 {
|
||||||
|
let align = mem::align_of::<usize>();
|
||||||
|
debug_assert!(old_size > 0);
|
||||||
|
debug_assert!(new_size > 0);
|
||||||
|
if let Ok(layout) = Layout::from_size_align(old_size, align) {
|
||||||
|
unsafe {
|
||||||
|
let ptr = realloc(ptr, layout, new_size);
|
||||||
|
if !ptr.is_null() {
|
||||||
|
return ptr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
malloc_failure();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cold]
|
||||||
|
fn malloc_failure() -> ! {
|
||||||
if cfg!(debug_assertions) {
|
if cfg!(debug_assertions) {
|
||||||
super::throw_str("invalid malloc request")
|
super::throw_str("invalid malloc request")
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user