diff --git a/Cargo.toml b/Cargo.toml index 68df429..7229173 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ byteorder = { version = "1", default-features = false } tempdir = "0.3" wabt = "0.2" diff = "0.1.11" +indoc = "0.3" [features] default = ["std"] diff --git a/examples/opt_imports.rs b/examples/opt_imports.rs new file mode 100644 index 0000000..c17f356 --- /dev/null +++ b/examples/opt_imports.rs @@ -0,0 +1,28 @@ +extern crate pwasm_utils as utils; + +use std::env; + +fn main() { + let args = env::args().collect::>(); + if args.len() != 3 { + println!("Usage: {} input_file.wasm output_file.wasm", args[0]); + return; + } + + // Loading module + let mut module = utils::Module::from_elements( + &parity_wasm::deserialize_file(&args[1]).expect("Module deserialization to succeed") + ).expect("Failed to parse parity-wasm format"); + + let mut delete_types = Vec::new(); + for type_ in module.types.iter() { + if type_.link_count() == 0 { + delete_types.push(type_.order().expect("type in list should have index")); + } + } + module.types.delete(&delete_types[..]); + + parity_wasm::serialize_to_file(&args[2], + module.generate().expect("Failed to generate valid format") + ).expect("Module serialization to succeed") +} diff --git a/src/graph.rs b/src/graph.rs new file mode 100644 index 0000000..a37a082 --- /dev/null +++ b/src/graph.rs @@ -0,0 +1,984 @@ +//! Wasm binary graph format + +#![warn(missing_docs)] + +use parity_wasm::elements; +use super::ref_list::{RefList, EntryRef}; +use std::{ + vec::Vec, + borrow::ToOwned, + string::String, + collections::BTreeMap, +}; + +/// Imported or declared variant of the same thing. +/// +/// In WebAssembly, function/global/memory/table instances can either be +/// imported or declared internally, forming united index space. +#[derive(Debug)] +pub enum ImportedOrDeclared { + /// Variant for imported instances. + Imported(String, String), + /// Variant for instances declared internally in the module. + Declared(T), +} + +impl From<&elements::ImportEntry> for ImportedOrDeclared { + fn from(v: &elements::ImportEntry) -> Self { + ImportedOrDeclared::Imported(v.module().to_owned(), v.field().to_owned()) + } +} + +/// Error for this module +#[derive(Debug)] +pub enum Error { + /// Inconsistent source representation + InconsistentSource, + /// Format error + Format(elements::Error), + /// Detached entry + DetachedEntry, +} + +/// Function origin (imported or internal). +pub type FuncOrigin = ImportedOrDeclared; +/// Global origin (imported or internal). +pub type GlobalOrigin = ImportedOrDeclared>; +/// Memory origin (imported or internal). +pub type MemoryOrigin = ImportedOrDeclared; +/// Table origin (imported or internal). +pub type TableOrigin = ImportedOrDeclared; + +/// Function body. +/// +/// Function consist of declaration (signature, i.e. type reference) +/// and the actual code. This part is the actual code. +#[derive(Debug)] +pub struct FuncBody { + pub locals: Vec, + pub code: Vec, +} + +/// Function declaration. +/// +/// As with other instances, functions can be either imported or declared +/// within the module - `origin` field is handling this. +#[derive(Debug)] +pub struct Func { + /// Function signature/type reference. + pub type_ref: EntryRef, + /// Where this function comes from (imported or declared). + pub origin: FuncOrigin, +} + +/// Global declaration. +/// +/// As with other instances, globals can be either imported or declared +/// within the module - `origin` field is handling this. +#[derive(Debug)] +pub struct Global { + pub content: elements::ValueType, + pub is_mut: bool, + pub origin: GlobalOrigin, +} + +/// Instruction. +/// +/// Some instructions don't reference any entities within the WebAssembly module, +/// while others do. This enum is for tracking references when required. +#[derive(Debug)] +pub enum Instruction { + /// WebAssembly instruction that does not reference any module entities. + Plain(elements::Instruction), + /// Call instruction which references the function. + Call(EntryRef), + /// Indirect call instruction which references function type (function signature). + CallIndirect(EntryRef, u8), + /// get_global instruction which references the global. + GetGlobal(EntryRef), + /// set_global instruction which references the global. + SetGlobal(EntryRef), +} + +/// Memory instance decriptor. +/// +/// As with other similar instances, memory instances can be either imported +/// or declared within the module - `origin` field is handling this. +#[derive(Debug)] +pub struct Memory { + /// Declared limits of the table instance. + pub limits: elements::ResizableLimits, + /// Origin of the memory instance (internal or imported). + pub origin: MemoryOrigin, +} + +/// Memory instance decriptor. +/// +/// As with other similar instances, memory instances can be either imported +/// or declared within the module - `origin` field is handling this. +#[derive(Debug)] +pub struct Table { + /// Declared limits of the table instance. + pub limits: elements::ResizableLimits, + /// Origin of the table instance (internal or imported). + pub origin: TableOrigin, +} + +/// Segment location. +/// +/// Reserved for future use. Currenty only `Default` variant is supported. +#[derive(Debug)] +pub enum SegmentLocation { + /// Not used currently. + Passive, + /// Default segment location with index `0`. + Default(Vec), + /// Not used currently. + WithIndex(u32, Vec), +} + +/// Data segment of data section. +#[derive(Debug)] +pub struct DataSegment { + /// Location of the segment in the linear memory. + pub location: SegmentLocation, + /// Raw value of the data segment. + pub value: Vec, +} + +/// Element segment of element section. +#[derive(Debug)] +pub struct ElementSegment { + /// Location of the segment in the table space. + pub location: SegmentLocation, + /// Raw value (function indices) of the element segment. + pub value: Vec>, +} + +/// Export entry reference. +/// +/// Module can export function, global, table or memory instance +/// under specific name (field). +#[derive(Debug)] +pub enum ExportLocal { + /// Function reference. + Func(EntryRef), + /// Global reference. + Global(EntryRef), + /// Table reference. + Table(EntryRef), + /// Memory reference. + Memory(EntryRef), +} + +/// Export entry description. +#[derive(Debug)] +pub struct Export { + /// Name (field) of the export entry. + pub name: String, + /// What entity is exported. + pub local: ExportLocal, +} + +/// Module +#[derive(Debug, Default)] +pub struct Module { + /// Refence-tracking list of types. + pub types: RefList, + /// Refence-tracking list of funcs. + pub funcs: RefList, + /// Refence-tracking list of memory instances. + pub memory: RefList, + /// Refence-tracking list of table instances. + pub tables: RefList
, + /// Refence-tracking list of globals. + pub globals: RefList, + /// Reference to start function. + pub start: Option>, + /// References to exported objects. + pub exports: Vec, + /// List of element segments. + pub elements: Vec, + /// List of data segments. + pub data: Vec, + /// Other module functions that are not decoded or processed. + pub other: BTreeMap, +} + +impl Module { + + fn map_instructions(&self, instructions: &[elements::Instruction]) -> Vec { + use parity_wasm::elements::Instruction::*; + instructions.iter().map(|instruction| match instruction { + Call(func_idx) => Instruction::Call(self.funcs.clone_ref(*func_idx as usize)), + CallIndirect(type_idx, arg2) => + Instruction::CallIndirect( + self.types.clone_ref(*type_idx as usize), + *arg2, + ), + SetGlobal(global_idx) => + Instruction::SetGlobal(self.globals.clone_ref(*global_idx as usize)), + GetGlobal(global_idx) => + Instruction::GetGlobal(self.globals.clone_ref(*global_idx as usize)), + other_instruction => Instruction::Plain(other_instruction.clone()), + }).collect() + } + + fn generate_instructions(&self, instructions: &[Instruction]) -> Vec { + use parity_wasm::elements::Instruction::*; + instructions.iter().map(|instruction| match instruction { + Instruction::Call(func_ref) => Call(func_ref.order().expect("detached instruction!") as u32), + Instruction::CallIndirect(type_ref, arg2) => CallIndirect(type_ref.order().expect("detached instruction!") as u32, *arg2), + Instruction::SetGlobal(global_ref) => SetGlobal(global_ref.order().expect("detached instruction!") as u32), + Instruction::GetGlobal(global_ref) => GetGlobal(global_ref.order().expect("detached instruction!") as u32), + Instruction::Plain(plain) => plain.clone(), + }).collect() + } + + /// Initialize module from parity-wasm `Module`. + pub fn from_elements(module: &elements::Module) -> Result { + + let mut idx = 0; + let mut res = Module::default(); + + let mut imported_functions = 0; + + for section in module.sections() { + match section { + elements::Section::Type(type_section) => { + res.types = RefList::from_slice(type_section.types()); + }, + elements::Section::Import(import_section) => { + for entry in import_section.entries() { + match *entry.external() { + elements::External::Function(f) => { + res.funcs.push(Func { + type_ref: res.types.get(f as usize).ok_or(Error::InconsistentSource)?.clone(), + origin: entry.into(), + }); + imported_functions += 1; + }, + elements::External::Memory(m) => { + res.memory.push(Memory { + limits: m.limits().clone(), + origin: entry.into(), + }); + }, + elements::External::Global(g) => { + res.globals.push(Global { + content: g.content_type(), + is_mut: g.is_mutable(), + origin: entry.into(), + }); + }, + elements::External::Table(t) => { + res.tables.push(Table { + limits: t.limits().clone(), + origin: entry.into(), + }); + }, + }; + } + }, + elements::Section::Function(function_section) => { + for f in function_section.entries() { + res.funcs.push(Func { + type_ref: res.types.get(f.type_ref() as usize) + .ok_or(Error::InconsistentSource)?.clone(), + origin: ImportedOrDeclared::Declared(FuncBody { + locals: Vec::new(), + // code will be populated later + code: Vec::new(), + }), + }); + }; + }, + elements::Section::Table(table_section) => { + for t in table_section.entries() { + res.tables.push(Table { + limits: t.limits().clone(), + origin: ImportedOrDeclared::Declared(()), + }); + } + }, + elements::Section::Memory(table_section) => { + for t in table_section.entries() { + res.memory.push(Memory { + limits: t.limits().clone(), + origin: ImportedOrDeclared::Declared(()), + }); + } + }, + elements::Section::Global(global_section) => { + for g in global_section.entries() { + let init_code = res.map_instructions(g.init_expr().code()); + res.globals.push(Global { + content: g.global_type().content_type(), + is_mut: g.global_type().is_mutable(), + origin: ImportedOrDeclared::Declared(init_code), + }); + } + }, + elements::Section::Export(export_section) => { + for e in export_section.entries() { + let local = match e.internal() { + &elements::Internal::Function(func_idx) => { + ExportLocal::Func(res.funcs.clone_ref(func_idx as usize)) + }, + &elements::Internal::Global(global_idx) => { + ExportLocal::Global(res.globals.clone_ref(global_idx as usize)) + }, + &elements::Internal::Memory(mem_idx) => { + ExportLocal::Memory(res.memory.clone_ref(mem_idx as usize)) + }, + &elements::Internal::Table(table_idx) => { + ExportLocal::Table(res.tables.clone_ref(table_idx as usize)) + }, + }; + + res.exports.push(Export { local: local, name: e.field().to_owned() }) + } + }, + elements::Section::Start(start_func) => { + res.start = Some(res.funcs.clone_ref(*start_func as usize)); + }, + elements::Section::Element(element_section) => { + for element_segment in element_section.entries() { + + // let location = if element_segment.passive() { + // SegmentLocation::Passive + // } else if element_segment.index() == 0 { + // SegmentLocation::Default(Vec::new()) + // } else { + // SegmentLocation::WithIndex(element_segment.index(), Vec::new()) + // }; + + // TODO: update parity-wasm and uncomment the above instead + let location = SegmentLocation::Default( + res.map_instructions(element_segment.offset().code()) + ); + + let funcs_map = element_segment + .members().iter() + .map(|idx| res.funcs.clone_ref(*idx as usize)) + .collect::>>(); + + res.elements.push(ElementSegment { + value: funcs_map, + location: location, + }); + } + }, + elements::Section::Code(code_section) => { + let mut idx = 0; + for func_body in code_section.bodies() { + let code = res.map_instructions(func_body.code().elements()); + + let mut func = res.funcs.get_ref(imported_functions + idx).write(); + match func.origin { + ImportedOrDeclared::Declared(ref mut body) => { + body.code = code; + body.locals = func_body.locals().iter().cloned().collect(); + }, + _ => { return Err(Error::InconsistentSource); } + } + + idx += 1; + } + }, + elements::Section::Data(data_section) => { + for data_segment in data_section.entries() { + // TODO: update parity-wasm and use the same logic as in + // commented element segment branch + let location = SegmentLocation::Default( + res.map_instructions(data_segment.offset().code()) + ); + + res.data.push(DataSegment { + value: data_segment.value().to_vec(), + location: location, + }); + } + }, + _ => { + res.other.insert(idx, section.clone()); + } + } + idx += 1; + } + + Ok(res) + } + + /// Generate raw format representation. + pub fn generate(&self) -> Result { + use self::ImportedOrDeclared::*; + + let mut idx = 0; + let mut sections = Vec::new(); + + custom_round(&self.other, &mut idx, &mut sections); + + if self.types.len() > 0 { + // TYPE SECTION (1) + let mut type_section = elements::TypeSection::default(); + { + let types = type_section.types_mut(); + + for type_entry in self.types.iter() { + types.push(type_entry.read().clone()) + } + } + sections.push(elements::Section::Type(type_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + // IMPORT SECTION (2) + let mut import_section = elements::ImportSection::default(); + + let add = { + let imports = import_section.entries_mut(); + for func in self.funcs.iter() { + match func.read().origin { + Imported(ref module, ref field) => { + imports.push( + elements::ImportEntry::new( + module.to_owned(), + field.to_owned(), + elements::External::Function( + func.read().type_ref.order().ok_or(Error::DetachedEntry)? as u32 + ), + ) + ) + }, + _ => continue, + } + } + + for global in self.globals.iter() { + match global.read().origin { + Imported(ref module, ref field) => { + imports.push( + elements::ImportEntry::new( + module.to_owned(), + field.to_owned(), + elements::External::Global( + elements::GlobalType::new( + global.read().content, + global.read().is_mut, + ) + ), + ) + ) + }, + _ => continue, + } + } + + for memory in self.memory.iter() { + match memory.read().origin { + Imported(ref module, ref field) => { + imports.push( + elements::ImportEntry::new( + module.to_owned(), + field.to_owned(), + elements::External::Memory( + elements::MemoryType::new( + memory.read().limits.initial(), + memory.read().limits.maximum(), + ) + ), + ) + ) + }, + _ => continue, + } + } + + for table in self.tables.iter() { + match table.read().origin { + Imported(ref module, ref field) => { + imports.push( + elements::ImportEntry::new( + module.to_owned(), + field.to_owned(), + elements::External::Table( + elements::TableType::new( + table.read().limits.initial(), + table.read().limits.maximum(), + ) + ), + ) + ) + }, + _ => continue, + } + } + imports.len() > 0 + }; + + if add { + sections.push(elements::Section::Import(import_section)); + idx += 1; + custom_round(&self.other, &mut idx, &mut sections); + } + + if self.funcs.len() > 0 { + // FUNC SECTION (3) + let mut func_section = elements::FunctionSection::default(); + { + let funcs = func_section.entries_mut(); + + for func in self.funcs.iter() { + match func.read().origin { + Declared(_) => { + funcs.push(elements::Func::new( + func.read().type_ref.order().ok_or(Error::DetachedEntry)? as u32 + )); + }, + _ => continue, + } + } + } + sections.push(elements::Section::Function(func_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + if self.tables.len() > 0 { + // TABLE SECTION (4) + let mut table_section = elements::TableSection::default(); + { + let tables = table_section.entries_mut(); + + for table in self.tables.iter() { + match table.read().origin { + Declared(_) => { + tables.push(elements::TableType::new( + table.read().limits.initial(), + table.read().limits.maximum(), + )); + }, + _ => continue, + } + } + } + sections.push(elements::Section::Table(table_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + if self.memory.len() > 0 { + // MEMORY SECTION (5) + let mut memory_section = elements::MemorySection::default(); + { + let memories = memory_section.entries_mut(); + + for memory in self.memory.iter() { + match memory.read().origin { + Declared(_) => { + memories.push(elements::MemoryType::new( + memory.read().limits.initial(), + memory.read().limits.maximum(), + )); + }, + _ => continue, + } + } + } + sections.push(elements::Section::Memory(memory_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + if self.globals.len() > 0 { + // GLOBAL SECTION (6) + let mut global_section = elements::GlobalSection::default(); + { + let globals = global_section.entries_mut(); + + for global in self.globals.iter() { + match global.read().origin { + Declared(ref init_code) => { + globals.push(elements::GlobalEntry::new( + elements::GlobalType::new(global.read().content, global.read().is_mut), + elements::InitExpr::new(self.generate_instructions(&init_code[..])), + )); + }, + _ => continue, + } + } + } + sections.push(elements::Section::Global(global_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + if self.exports.len() > 0 { + // EXPORT SECTION (7) + let mut export_section = elements::ExportSection::default(); + { + let exports = export_section.entries_mut(); + + for export in self.exports.iter() { + let internal = match export.local { + ExportLocal::Func(ref func_ref) => { + elements::Internal::Function(func_ref.order().ok_or(Error::DetachedEntry)? as u32) + }, + ExportLocal::Global(ref global_ref) => { + elements::Internal::Global(global_ref.order().ok_or(Error::DetachedEntry)? as u32) + }, + ExportLocal::Table(ref table_ref) => { + elements::Internal::Table(table_ref.order().ok_or(Error::DetachedEntry)? as u32) + }, + ExportLocal::Memory(ref memory_ref) => { + elements::Internal::Memory(memory_ref.order().ok_or(Error::DetachedEntry)? as u32) + }, + }; + + exports.push(elements::ExportEntry::new(export.name.to_owned(), internal)); + } + } + sections.push(elements::Section::Export(export_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + if let Some(ref func_ref) = self.start { + // START SECTION (8) + sections.push(elements::Section::Start( + func_ref.order().ok_or(Error::DetachedEntry)? as u32 + )); + } + + if self.elements.len() > 0 { + // START SECTION (9) + let mut element_section = elements::ElementSection::default(); + { + let element_segments = element_section.entries_mut(); + + for element in self.elements.iter() { + match element.location { + SegmentLocation::Default(ref offset_expr) => { + let mut elements_map = Vec::new(); + for f in element.value.iter() { + elements_map.push(f.order().ok_or(Error::DetachedEntry)? as u32); + } + + element_segments.push( + elements::ElementSegment::new( + 0, + elements::InitExpr::new(self.generate_instructions(&offset_expr[..])), + elements_map, + ) + ); + }, + _ => unreachable!("Other segment location types are never added"), + } + } + } + + sections.push(elements::Section::Element(element_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + if self.funcs.len() > 0 { + // CODE SECTION (10) + let mut code_section = elements::CodeSection::default(); + { + let funcs = code_section.bodies_mut(); + + for func in self.funcs.iter() { + match func.read().origin { + Declared(ref body) => { + funcs.push(elements::FuncBody::new( + body.locals.clone(), + elements::Instructions::new(self.generate_instructions(&body.code[..])), + )); + }, + _ => continue, + } + } + } + sections.push(elements::Section::Code(code_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + + if self.data.len() > 0 { + // DATA SECTION (11) + let mut data_section = elements::DataSection::default(); + { + let data_segments = data_section.entries_mut(); + + for data_entry in self.data.iter() { + match data_entry.location { + SegmentLocation::Default(ref offset_expr) => { + data_segments.push( + elements::DataSegment::new( + 0, + elements::InitExpr::new(self.generate_instructions(&offset_expr[..])), + data_entry.value.clone(), + ) + ); + }, + _ => unreachable!("Other segment location types are never added"), + } + } + } + + sections.push(elements::Section::Data(data_section)); + idx += 1; + + custom_round(&self.other, &mut idx, &mut sections); + } + + Ok(elements::Module::new(sections)) + } +} + +fn custom_round( + map: &BTreeMap, + idx: &mut usize, + sections: &mut Vec, +) { + while let Some(other_section) = map.get(&idx) { + sections.push(other_section.clone()); + *idx += 1; + } +} + +/// New module from parity-wasm `Module` +pub fn parse(wasm: &[u8]) -> Result { + Module::from_elements(&::parity_wasm::deserialize_buffer(wasm).map_err(Error::Format)?) +} + +/// Generate parity-wasm `Module` +pub fn generate(f: &Module) -> Result, Error> { + let pm = f.generate()?; + ::parity_wasm::serialize(pm).map_err(Error::Format) +} + +#[cfg(test)] +mod tests { + + extern crate wabt; + + use parity_wasm::elements; + + fn load_sample(wat: &'static str) -> super::Module { + super::parse(&wabt::wat2wasm(wat).expect("faled to parse wat!")[..]) + .expect("error making representation") + } + + fn validate_sample(module: &super::Module) { + let binary = super::generate(module).expect("Failed to generate binary"); + wabt::Module::read_binary(&binary, &Default::default()) + .expect("Wabt failed to read final binary") + .validate() + .expect("Invalid module"); + } + + #[test] + fn smoky() { + let sample = load_sample(indoc!(r#" + (module + (type (func)) + (func (type 0)) + (memory 0 1) + (export "simple" (func 0)))"# + )); + + assert_eq!(sample.types.len(), 1); + assert_eq!(sample.funcs.len(), 1); + assert_eq!(sample.tables.len(), 0); + assert_eq!(sample.memory.len(), 1); + assert_eq!(sample.exports.len(), 1); + + assert_eq!(sample.types.get_ref(0).link_count(), 1); + assert_eq!(sample.funcs.get_ref(0).link_count(), 1); + } + + #[test] + fn table() { + let mut sample = load_sample(indoc!(r#" + (module + (import "env" "foo" (func $foo)) + (func (param i32) + get_local 0 + i32.const 0 + call $i32.add + drop + ) + (func $i32.add (export "i32.add") (param i32 i32) (result i32) + get_local 0 + get_local 1 + i32.add + ) + (table 10 anyfunc) + + ;; Refer all types of functions: imported, defined not exported and defined exported. + (elem (i32.const 0) 0 1 2) + )"# + )); + + { + let element_func = &sample.elements[0].value[1]; + let rfunc = element_func.read(); + let rtype = &**rfunc.type_ref.read(); + let elements::Type::Function(ref ftype) = rtype; + + // it's func#1 in the function space + assert_eq!(rfunc.order(), Some(1)); + // it's type#1 + assert_eq!(ftype.params().len(), 1); + } + + sample.funcs.begin_delete().push(0).done(); + + { + let element_func = &sample.elements[0].value[1]; + let rfunc = element_func.read(); + let rtype = &**rfunc.type_ref.read(); + let elements::Type::Function(ref ftype) = rtype; + + /// import deleted so now it's func #0 + assert_eq!(rfunc.order(), Some(0)); + /// type should be the same, #1 + assert_eq!(ftype.params().len(), 1); + } + } + + #[test] + fn new_import() { + let mut sample = load_sample(indoc!(r#" + (module + (type (;0;) (func)) + (type (;1;) (func (param i32 i32) (result i32))) + (import "env" "foo" (func (type 1))) + (func (param i32) + get_local 0 + i32.const 0 + call 0 + drop + ) + (func (type 0) + i32.const 0 + call 1 + ) + )"# + )); + + { + let type_ref_0 = sample.types.clone_ref(0); + let declared_func_2 = sample.funcs.clone_ref(2); + + let mut tx = sample.funcs.begin_insert_not_until( + |f| match f.origin { + super::ImportedOrDeclared::Imported(_, _) => true, + _ => false, + } + ); + + let new_import_func = tx.push(super::Func { + type_ref: type_ref_0, + origin: super::ImportedOrDeclared::Imported("env".to_owned(), "bar".to_owned()), + }); + + tx.done(); + + assert_eq!(new_import_func.order(), Some(1)); + assert_eq!(declared_func_2.order(), Some(3)); + assert_eq!( + match &declared_func_2.read().origin { + super::ImportedOrDeclared::Declared(ref body) => { + match body.code[1] { + super::Instruction::Call(ref called_func) => called_func.order(), + _ => panic!("instruction #2 should be a call!"), + } + }, + _ => panic!("func #3 should be declared!"), + }, + Some(2), + "Call should be recalculated to 2" + ); + } + + validate_sample(&sample); + } + + #[test] + fn simple_opt() { + let mut sample = load_sample(indoc!(r#" + (module + (type (;0;) (func)) + (type (;1;) (func (param i32 i32) (result i32))) + (type (;2;) (func (param i32 i32) (result i32))) + (type (;3;) (func (param i32 i32) (result i32))) + (import "env" "foo" (func (type 1))) + (import "env" "foo2" (func (type 2))) + (import "env" "foo3" (func (type 3))) + (func (type 0) + i32.const 1 + i32.const 1 + call 0 + drop + ) + (func (type 0) + i32.const 2 + i32.const 2 + call 1 + drop + ) + (func (type 0) + i32.const 3 + i32.const 3 + call 2 + drop + ) + (func (type 0) + call 3 + ) + )"# + )); + + validate_sample(&sample); + + // we'll delete functions #4 and #5, nobody references it so it should be fine; + + sample.funcs.begin_delete().push(4).push(5).done(); + validate_sample(&sample); + + // now we'll delete functions #1 and #2 (imported and called from the deleted above), + // should also be fine + sample.funcs.begin_delete().push(1).push(2).done(); + validate_sample(&sample); + + // now the last declared function left should call another one before it (which is index #1) + let declared_func_2 = sample.funcs.clone_ref(2); + assert_eq!( + match &declared_func_2.read().origin { + super::ImportedOrDeclared::Declared(ref body) => { + match body.code[0] { + super::Instruction::Call(ref called_func) => called_func.order(), + ref wrong_instruction => panic!("instruction #2 should be a call but got {:?}!", wrong_instruction), + } + }, + _ => panic!("func #0 should be declared!"), + }, + Some(1), + "Call should be recalculated to 1" + ); + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 307c7ad..6f034ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,9 @@ extern crate alloc; extern crate byteorder; extern crate parity_wasm; -#[macro_use] -extern crate log; +#[macro_use] extern crate log; +#[cfg(test)] #[macro_use] extern crate indoc; + pub mod rules; @@ -18,6 +19,8 @@ mod gas; mod optimizer; mod pack; mod runtime_type; +mod graph; +mod ref_list; mod symbols; pub mod stack_height; @@ -30,6 +33,8 @@ pub use gas::inject_gas_counter; pub use optimizer::{optimize, Error as OptimizerError}; pub use pack::{pack_instance, Error as PackingError}; pub use runtime_type::inject_runtime_type; +pub use graph::{Module, parse as graph_parse, generate as graph_generate}; +pub use ref_list::{RefList, Entry, EntryRef, DeleteTransaction}; pub struct TargetSymbols { pub create: &'static str, @@ -74,7 +79,11 @@ mod std { pub use alloc::{borrow, boxed, string, vec}; pub use core::*; - pub mod collections { - pub use alloc::collections::{BTreeMap, BTreeSet}; - } + pub mod rc { + pub use alloc::rc::Rc; + } + + pub mod collections { + pub use alloc::collections::{BTreeMap, BTreeSet}; + } } diff --git a/src/ref_list.rs b/src/ref_list.rs new file mode 100644 index 0000000..f33e882 --- /dev/null +++ b/src/ref_list.rs @@ -0,0 +1,562 @@ +#![warn(missing_docs)] + +use std::rc::Rc; +use std::cell::RefCell; +use std::vec::Vec; +use std::slice; + +#[derive(Debug)] +enum EntryOrigin { + Index(usize), + Detached, +} + +impl From for EntryOrigin { + fn from(v: usize) -> Self { + EntryOrigin::Index(v) + } +} + +/// Reference counting, link-handling object. +#[derive(Debug)] +pub struct Entry { + val: T, + index: EntryOrigin, +} + +impl Entry { + /// New entity. + pub fn new(val: T, index: usize) -> Entry { + Entry { + val: val, + index: EntryOrigin::Index(index), + } + } + + /// New detached entry. + pub fn new_detached(val: T) -> Entry { + Entry { + val: val, + index: EntryOrigin::Detached, + } + } + + /// Index of the element within the reference list. + pub fn order(&self) -> Option { + match self.index { + EntryOrigin::Detached => None, + EntryOrigin::Index(idx) => Some(idx), + } + } +} + +impl ::std::ops::Deref for Entry { + type Target = T; + + fn deref(&self) -> &T { + &self.val + } +} + +impl ::std::ops::DerefMut for Entry { + fn deref_mut(&mut self) -> &mut T { + &mut self.val + } +} + +/// Reference to the entry in the rerence list. +#[derive(Debug)] +pub struct EntryRef(Rc>>); + +impl Clone for EntryRef { + fn clone(&self) -> Self { + EntryRef(self.0.clone()) + } +} + +impl From> for EntryRef { + fn from(v: Entry) -> Self { + EntryRef(Rc::new(RefCell::new(v))) + } +} + +impl EntryRef { + /// Read the reference data. + pub fn read(&self) -> ::std::cell::Ref> { + self.0.borrow() + } + + /// Try to modify internal content of the referenced object. + /// + /// May panic if it is already borrowed. + pub fn write(&self) -> ::std::cell::RefMut> { + self.0.borrow_mut() + } + + /// Index of the element within the reference list. + pub fn order(&self) -> Option { + self.0.borrow().order() + } + + /// Number of active links to this entity. + pub fn link_count(&self) -> usize { + Rc::strong_count(&self.0) - 1 + } +} + +/// List that tracks references and indices. +#[derive(Debug)] +pub struct RefList { + items: Vec>, +} + +impl Default for RefList { + fn default() -> Self { + RefList { items: Default::default() } + } +} + +impl RefList { + + /// New empty list. + pub fn new() -> Self { Self::default() } + + /// Push new element in the list. + /// + /// Returns refernce tracking entry. + pub fn push(&mut self, t: T) -> EntryRef { + let idx = self.items.len(); + let val: EntryRef<_> = Entry::new(t, idx).into(); + self.items.push(val.clone()); + val + } + + /// Start deleting. + /// + /// Start deleting some entries in the list. Returns transaction + /// that can be populated with number of removed entries. + /// When transaction is finailized, all entries are deleted and + /// internal indices of other entries are updated. + pub fn begin_delete(&mut self) -> DeleteTransaction { + DeleteTransaction { + list: self, + deleted: Vec::new(), + } + } + + /// Start inserting. + /// + /// Start inserting some entries in the list at he designated position. + /// Returns transaction that can be populated with some entries. + /// When transaction is finailized, all entries are inserted and + /// internal indices of other entries might be updated. + pub fn begin_insert(&mut self, at: usize) -> InsertTransaction { + InsertTransaction { + at: at, + list: self, + items: Vec::new(), + } + } + + /// Start inserting after the condition first matches (or at the end). + /// + /// Start inserting some entries in the list at he designated position. + /// Returns transaction that can be populated with some entries. + /// When transaction is finailized, all entries are inserted and + /// internal indices of other entries might be updated. + pub fn begin_insert_after(&mut self, mut f: F) -> InsertTransaction + where F : FnMut(&T) -> bool + { + let pos = self + .items.iter() + .position(|rf| f(&**rf.read())).map(|x| x + 1) + .unwrap_or(self.items.len()); + + self.begin_insert(pos) + } + + /// Start inserting after the condition first no longer true (or at the end). + /// + /// Start inserting some entries in the list at he designated position. + /// Returns transaction that can be populated with some entries. + /// When transaction is finailized, all entries are inserted and + /// internal indices of other entries might be updated. + pub fn begin_insert_not_until(&mut self, mut f: F) -> InsertTransaction + where F : FnMut(&T) -> bool + { + let pos = self.items.iter().take_while(|rf| f(&**rf.read())).count(); + self.begin_insert(pos) + } + + /// Get entry with index (checked). + /// + /// Can return None when index out of bounts. + pub fn get(&self, idx: usize) -> Option> { + self.items.get(idx).cloned() + } + + fn done_delete(&mut self, indices: &[usize]) { + for mut entry in self.items.iter_mut() { + let mut entry = entry.write(); + let total_less = indices.iter() + .take_while(|x| **x < entry.order().expect("Items in the list always have order; qed")) + .count(); + match entry.index { + EntryOrigin::Detached => unreachable!("Items in the list always have order!"), + EntryOrigin::Index(ref mut idx) => { *idx -= total_less; }, + }; + } + + let mut total_removed = 0; + for idx in indices { + let mut detached = self.items.remove(*idx - total_removed); + detached.write().index = EntryOrigin::Detached; + total_removed += 1; + } + } + + fn done_insert(&mut self, index: usize, mut items: Vec>) { + let mut offset = 0; + for item in items.drain(..) { + item.write().index = EntryOrigin::Index(index + offset); + self.items.insert(index + offset, item); + offset += 1; + } + + for idx in (index+offset)..self.items.len() { + self.get_ref(idx).write().index = EntryOrigin::Index(idx); + } + } + + /// Delete several items. + pub fn delete(&mut self, indices: &[usize]) { + self.done_delete(indices) + } + + /// Delete one item. + pub fn delete_one(&mut self, index: usize) { + self.done_delete(&[index]) + } + + /// Initialize from slice. + /// + /// Slice members are cloned. + pub fn from_slice(list: &[T]) -> Self + where T: Clone + { + let mut res = Self::new(); + + for t in list { + res.push(t.clone()); + } + + res + } + + /// Length of the list. + pub fn len(&self) -> usize { + self.items.len() + } + + /// Clone entry (reference counting object to item) by index. + /// + /// Will panic if index out of bounds. + pub fn clone_ref(&self, idx: usize) -> EntryRef { + self.items[idx].clone() + } + + /// Get reference to entry by index. + /// + /// Will panic if index out of bounds. + pub fn get_ref(&self, idx: usize) -> &EntryRef { + &self.items[idx] + } + + /// Iterate through entries. + pub fn iter(&self) -> slice::Iter> { + self.items.iter() + } +} + +/// Delete transaction. +#[must_use] +pub struct DeleteTransaction<'a, T> { + list: &'a mut RefList, + deleted: Vec, +} + +impl<'a, T> DeleteTransaction<'a, T> { + /// Add new element to the delete list. + pub fn push(self, idx: usize) -> Self { + let mut tx = self; + tx.deleted.push(idx); + tx + } + + /// Commit transaction. + pub fn done(self) { + let indices = self.deleted; + let list = self.list; + list.done_delete(&indices[..]); + } +} + +/// Insert transaction +#[must_use] +pub struct InsertTransaction<'a, T> { + at: usize, + list: &'a mut RefList, + items: Vec>, +} + +impl<'a, T> InsertTransaction<'a, T> { + /// Add new element to the delete list. + pub fn push(&mut self, val: T) -> EntryRef { + let val: EntryRef<_> = Entry::new_detached(val).into(); + self.items.push(val.clone()); + val + } + + /// Commit transaction. + pub fn done(self) { + let items = self.items; + let list = self.list; + let at = self.at; + list.done_insert(at, items); + } +} + + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn order() { + let mut list = RefList::::new(); + let item00 = list.push(0); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + + assert_eq!(item00.order(), Some(0)); + assert_eq!(item10.order(), Some(1)); + assert_eq!(item20.order(), Some(2)); + assert_eq!(item30.order(), Some(3)); + + assert_eq!(**item00.read(), 0); + assert_eq!(**item10.read(), 10); + assert_eq!(**item20.read(), 20); + assert_eq!(**item30.read(), 30); + } + + #[test] + fn delete() { + let mut list = RefList::::new(); + let item00 = list.push(0); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + + list.begin_delete().push(2).done(); + + assert_eq!(item00.order(), Some(0)); + assert_eq!(item10.order(), Some(1)); + assert_eq!(item30.order(), Some(2)); + + // but this was detached + assert_eq!(item20.order(), None); + } + + #[test] + fn complex_delete() { + let mut list = RefList::::new(); + let item00 = list.push(0); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + let item40 = list.push(40); + let item50 = list.push(50); + let item60 = list.push(60); + let item70 = list.push(70); + let item80 = list.push(80); + let item90 = list.push(90); + + list.begin_delete().push(1).push(2).push(4).push(6).done(); + + assert_eq!(item00.order(), Some(0)); + assert_eq!(item10.order(), None); + assert_eq!(item20.order(), None); + assert_eq!(item30.order(), Some(1)); + assert_eq!(item40.order(), None); + assert_eq!(item50.order(), Some(2)); + assert_eq!(item60.order(), None); + assert_eq!(item70.order(), Some(3)); + assert_eq!(item80.order(), Some(4)); + assert_eq!(item90.order(), Some(5)); + } + + #[test] + fn insert() { + let mut list = RefList::::new(); + let item00 = list.push(0); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + + let mut insert_tx = list.begin_insert(3); + let item23 = insert_tx.push(23); + let item27 = insert_tx.push(27); + insert_tx.done(); + + assert_eq!(item00.order(), Some(0)); + assert_eq!(item10.order(), Some(1)); + assert_eq!(item20.order(), Some(2)); + assert_eq!(item23.order(), Some(3)); + assert_eq!(item27.order(), Some(4)); + assert_eq!(item30.order(), Some(5)); + } + + #[test] + fn insert_end() { + let mut list = RefList::::new(); + + let mut insert_tx = list.begin_insert(0); + let item0 = insert_tx.push(0); + insert_tx.done(); + + assert_eq!(item0.order(), Some(0)); + } + + #[test] + fn insert_end_more() { + let mut list = RefList::::new(); + let item0 = list.push(0); + + let mut insert_tx = list.begin_insert(1); + let item1 = insert_tx.push(1); + insert_tx.done(); + + assert_eq!(item0.order(), Some(0)); + assert_eq!(item1.order(), Some(1)); + } + + #[test] + fn insert_after() { + let mut list = RefList::::new(); + let item00 = list.push(0); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + + let mut insert_tx = list.begin_insert_after(|i| *i == 20); + + let item23 = insert_tx.push(23); + let item27 = insert_tx.push(27); + insert_tx.done(); + + assert_eq!(item00.order(), Some(0)); + assert_eq!(item10.order(), Some(1)); + assert_eq!(item20.order(), Some(2)); + assert_eq!(item23.order(), Some(3)); + assert_eq!(item27.order(), Some(4)); + assert_eq!(item30.order(), Some(5)); + } + + #[test] + fn insert_not_until() { + let mut list = RefList::::new(); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + + let mut insert_tx = list.begin_insert_not_until(|i| *i <= 20); + + let item23 = insert_tx.push(23); + let item27 = insert_tx.push(27); + insert_tx.done(); + + assert_eq!(item10.order(), Some(0)); + assert_eq!(item20.order(), Some(1)); + assert_eq!(item23.order(), Some(2)); + assert_eq!(item27.order(), Some(3)); + assert_eq!(item30.order(), Some(4)); + } + + #[test] + fn insert_after_none() { + let mut list = RefList::::new(); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + + let mut insert_tx = list.begin_insert_after(|i| *i == 50); + + let item55 = insert_tx.push(23); + let item59 = insert_tx.push(27); + insert_tx.done(); + + assert_eq!(item10.order(), Some(0)); + assert_eq!(item20.order(), Some(1)); + assert_eq!(item30.order(), Some(2)); + assert_eq!(item55.order(), Some(3)); + assert_eq!(item59.order(), Some(4)); + } + + #[test] + fn insert_not_until_none() { + let mut list = RefList::::new(); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + + let mut insert_tx = list.begin_insert_not_until(|i| *i < 50); + + let item55 = insert_tx.push(23); + let item59 = insert_tx.push(27); + insert_tx.done(); + + assert_eq!(item10.order(), Some(0)); + assert_eq!(item20.order(), Some(1)); + assert_eq!(item30.order(), Some(2)); + assert_eq!(item55.order(), Some(3)); + assert_eq!(item59.order(), Some(4)); + } + + #[test] + fn insert_after_empty() { + let mut list = RefList::::new(); + + let mut insert_tx = list.begin_insert_after(|x| *x == 100); + let item0 = insert_tx.push(0); + insert_tx.done(); + + assert_eq!(item0.order(), Some(0)); + } + + #[test] + fn insert_more() { + let mut list = RefList::::new(); + let item10 = list.push(10); + let item20 = list.push(20); + let item30 = list.push(30); + let item40 = list.push(10); + let item50 = list.push(20); + let item60 = list.push(30); + + let mut insert_tx = list.begin_insert(3); + let item35 = insert_tx.push(23); + let item37 = insert_tx.push(27); + insert_tx.done(); + + assert_eq!(item10.order(), Some(0)); + assert_eq!(item20.order(), Some(1)); + assert_eq!(item30.order(), Some(2)); + assert_eq!(item35.order(), Some(3)); + assert_eq!(item37.order(), Some(4)); + assert_eq!(item40.order(), Some(5)); + assert_eq!(item50.order(), Some(6)); + assert_eq!(item60.order(), Some(7)); + } +} \ No newline at end of file diff --git a/tests/diff.rs b/tests/diff.rs index 4200bb6..3c889d7 100644 --- a/tests/diff.rs +++ b/tests/diff.rs @@ -30,6 +30,7 @@ fn validate_wasm(binary: &[u8]) -> Result<(), wabt::Error> { } fn run_diff_test Vec>(test_dir: &str, name: &str, test: F) { + // FIXME: not going to work on windows? let mut fixture_path = PathBuf::from(concat!( env!("CARGO_MANIFEST_DIR"), "/tests/fixtures/", @@ -37,6 +38,7 @@ fn run_diff_test Vec>(test_dir: &str, name: &str, test: fixture_path.push(test_dir); fixture_path.push(name); + // FIXME: not going to work on windows? let mut expected_path = PathBuf::from(concat!( env!("CARGO_MANIFEST_DIR"), "/tests/expectations/"