From 5180d694ce49a0e7095960a7878a931f50dbe767 Mon Sep 17 00:00:00 2001 From: Jim Posen Date: Fri, 12 Jul 2019 14:48:15 +0200 Subject: [PATCH] Validate the gas metering algorithm using fuzzer. --- Cargo.toml | 2 + src/{gas.rs => gas/mod.rs} | 30 ++- src/gas/validation.rs | 370 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 + 4 files changed, 394 insertions(+), 10 deletions(-) rename src/{gas.rs => gas/mod.rs} (98%) create mode 100644 src/gas/validation.rs diff --git a/Cargo.toml b/Cargo.toml index 405bd3c..e221c41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,8 @@ tempdir = "0.3" wabt = "0.2" diff = "0.1.11" indoc = "0.3" +rand = "0.7" +binaryen = "0.8" [features] default = ["std"] diff --git a/src/gas.rs b/src/gas/mod.rs similarity index 98% rename from src/gas.rs rename to src/gas/mod.rs index 2efd82b..59d728a 100644 --- a/src/gas.rs +++ b/src/gas/mod.rs @@ -4,6 +4,9 @@ //! module into one that charges gas for code to be executed. See function documentation for usage //! and details. +#[cfg(test)] +mod validation; + use std::cmp::min; use std::mem; use std::vec::Vec; @@ -62,7 +65,7 @@ struct ControlBlock { /// are constructed with the property that, in the absence of any traps, either all instructions in /// the block are executed or none are. #[derive(Debug)] -struct MeteredBlock { +pub(crate) struct MeteredBlock { /// Index of the first instruction (aka `Opcode`) in the block. start_pos: usize, /// Sum of costs of all instructions until end of the block. @@ -256,11 +259,10 @@ fn add_grow_counter(module: elements::Module, rules: &rules::Set, gas_func: u32) b.build() } -pub fn inject_counter( - instructions: &mut elements::Instructions, +pub(crate) fn determine_metered_blocks( + instructions: &elements::Instructions, rules: &rules::Set, - gas_func: u32, -) -> Result<(), ()> { +) -> Result, ()> { use parity_wasm::elements::Instruction::*; let mut counter = Counter::new(); @@ -325,13 +327,23 @@ pub fn inject_counter( } } - insert_metering_calls(instructions, counter.finalized_blocks, gas_func) + counter.finalized_blocks.sort_unstable_by_key(|block| block.start_pos); + Ok(counter.finalized_blocks) +} + +pub fn inject_counter( + instructions: &mut elements::Instructions, + rules: &rules::Set, + gas_func: u32, +) -> Result<(), ()> { + let blocks = determine_metered_blocks(instructions, rules)?; + insert_metering_calls(instructions, blocks, gas_func) } // Then insert metering calls into a sequence of instructions given the block locations and costs. fn insert_metering_calls( instructions: &mut elements::Instructions, - mut blocks: Vec, + blocks: Vec, gas_func: u32, ) -> Result<(), ()> @@ -346,9 +358,7 @@ fn insert_metering_calls( ); let new_instrs = instructions.elements_mut(); - blocks.sort_unstable_by_key(|block| block.start_pos); let mut block_iter = blocks.into_iter().peekable(); - for (original_pos, instr) in original_instrs.into_iter().enumerate() { // If there the next block starts at this position, inject metering instructions. let used_block = if let Some(ref block) = block_iter.peek() { @@ -494,7 +504,7 @@ mod tests { use super::*; use rules; - fn get_function_body(module: &elements::Module, index: usize) + pub fn get_function_body(module: &elements::Module, index: usize) -> Option<&[elements::Instruction]> { module.code_section() diff --git a/src/gas/validation.rs b/src/gas/validation.rs new file mode 100644 index 0000000..b367366 --- /dev/null +++ b/src/gas/validation.rs @@ -0,0 +1,370 @@ +//! This module is used to validate the correctness of the gas metering algorithm. +//! +//! Since the gas metering algorithm is complex, this checks correctness by fuzzing. The testing +//! strategy is to generate random, valid Wasm modules using Binaryen's translate-to-fuzz +//! functionality, then ensure for all functions defined, in all execution paths though the +//! function body that do not trap that the amount of gas charged by the proposed metering +//! instructions is correct. This is done by constructing a control flow graph and exhaustively +//! searching though all paths, which may take exponential time in the size of the function body in +//! the worst case. + +use super::MeteredBlock; +use rules::Set as RuleSet; +use parity_wasm::elements::{FuncBody, Instruction}; + +use std::collections::HashMap; + +/// An ID for a node in a ControlFlowGraph. +type NodeId = usize; + +/// A node in a control flow graph is commonly known as a basic block. This is a sequence of +/// operations that are always executed sequentially. +#[derive(Debug)] +struct ControlFlowNode { + /// The index of the first instruction in the basic block. This is only used for debugging. + first_instr_pos: Option, + + /// The actual gas cost of executing all instructions in the basic block. + actual_cost: u32, + + /// The amount of gas charged by the injected metering instructions within this basic block. + charged_cost: u32, + + /// Whether there are any other nodes in the graph that loop back to this one. Every cycle in + /// the control flow graph contains at least one node with this flag set. + is_loop_target: bool, + + /// Edges in the "forward" direction of the graph. The graph of nodes and their forward edges + /// forms a directed acyclic graph (DAG). + forward_edges: Vec, + + /// Edges in the "backwards" direction. These edges form cycles in the graph. + loopback_edges: Vec, +} + +impl Default for ControlFlowNode { + fn default() -> Self { + ControlFlowNode { + first_instr_pos: None, + actual_cost: 0, + charged_cost: 0, + is_loop_target: false, + forward_edges: Vec::new(), + loopback_edges: Vec::new(), + } + } +} + +/// A control flow graph where nodes are basic blocks and edges represent possible transitions +/// between them in execution flow. The graph has two types of edges, forward and loop-back edges. +/// The subgraph with only the forward edges forms a directed acyclic graph (DAG); including the +/// loop-back edges introduces cycles. +#[derive(Debug)] +pub struct ControlFlowGraph { + nodes: Vec, +} + +impl ControlFlowGraph { + fn new() -> Self { + ControlFlowGraph { + nodes: Vec::new(), + } + } + + fn get_node(&self, node_id: NodeId) -> &ControlFlowNode { + self.nodes.get(node_id).unwrap() + } + + fn get_node_mut(&mut self, node_id: NodeId) -> &mut ControlFlowNode { + self.nodes.get_mut(node_id).unwrap() + } + + fn add_node(&mut self) -> NodeId { + self.nodes.push(ControlFlowNode::default()); + self.nodes.len() - 1 + } + + fn increment_actual_cost(&mut self, node_id: NodeId, cost: u32) { + self.get_node_mut(node_id).actual_cost += cost; + } + + fn increment_charged_cost(&mut self, node_id: NodeId, cost: u32) { + self.get_node_mut(node_id).charged_cost += cost; + } + + fn set_first_instr_pos(&mut self, node_id: NodeId, first_instr_pos: usize) { + self.get_node_mut(node_id).first_instr_pos = Some(first_instr_pos) + } + + fn new_edge(&mut self, from_id: NodeId, target_frame: &ControlFrame) { + if target_frame.is_loop { + self.new_loopback_edge(from_id, target_frame.entry_node); + } else { + self.new_forward_edge(from_id, target_frame.exit_node); + } + } + + fn new_forward_edge(&mut self, from_id: NodeId, to_id: NodeId) { + self.get_node_mut(from_id).forward_edges.push(to_id) + } + + fn new_loopback_edge(&mut self, from_id: NodeId, to_id: NodeId) { + self.get_node_mut(from_id).loopback_edges.push(to_id); + self.get_node_mut(to_id).is_loop_target = true; + } +} + +/// A control frame is opened upon entry into a function and by the `block`, `if`, and `loop` +/// instructions and is closed by `end` instructions. +struct ControlFrame { + is_loop: bool, + entry_node: NodeId, + exit_node: NodeId, + active_node: NodeId, +} + +impl ControlFrame { + fn new(entry_node_id: NodeId, exit_node_id: NodeId, is_loop: bool) -> Self { + ControlFrame { + is_loop, + entry_node: entry_node_id, + exit_node: exit_node_id, + active_node: entry_node_id, + } + } +} + +/// Construct a control flow graph from a function body and the metered blocks computed for it. +/// +/// This assumes that the function body has been validated already, otherwise this may panic. +fn build_control_flow_graph( + body: &FuncBody, + rules: &RuleSet, + blocks: &[MeteredBlock] +) -> Result { + let mut graph = ControlFlowGraph::new(); + + let entry_node_id = graph.add_node(); + let terminal_node_id = graph.add_node(); + + graph.set_first_instr_pos(entry_node_id, 0); + + let mut stack = Vec::new(); + stack.push(ControlFrame::new(entry_node_id, terminal_node_id, false)); + + let mut metered_blocks_iter = blocks.iter().peekable(); + for (cursor, instruction) in body.code().elements().iter().enumerate() { + let active_node_id = stack.last() + .expect("module is valid by pre-condition; control stack must not be empty; qed") + .active_node; + + // Increment the charged cost if there are metering instructions to be inserted here. + let apply_block = metered_blocks_iter.peek() + .map_or(false, |block| block.start_pos == cursor); + if apply_block { + let next_metered_block = metered_blocks_iter.next() + .expect("peek returned an item; qed"); + graph.increment_charged_cost(active_node_id, next_metered_block.cost); + } + + let instruction_cost = rules.process(instruction)?; + match *instruction { + Instruction::Block(_) => { + graph.increment_actual_cost(active_node_id, instruction_cost); + + let exit_node_id = graph.add_node(); + stack.push(ControlFrame::new(active_node_id, exit_node_id, false)); + } + Instruction::If(_) => { + graph.increment_actual_cost(active_node_id, instruction_cost); + + let then_node_id = graph.add_node(); + let exit_node_id = graph.add_node(); + + stack.push(ControlFrame::new(then_node_id, exit_node_id, false)); + graph.new_forward_edge(active_node_id, then_node_id); + graph.set_first_instr_pos(then_node_id, cursor + 1); + } + Instruction::Loop(_) => { + graph.increment_actual_cost(active_node_id, instruction_cost); + + let loop_node_id = graph.add_node(); + let exit_node_id = graph.add_node(); + + stack.push(ControlFrame::new(loop_node_id, exit_node_id, true)); + graph.new_forward_edge(active_node_id, loop_node_id); + graph.set_first_instr_pos(loop_node_id, cursor + 1); + } + Instruction::Else => { + let active_frame_idx = stack.len() - 1; + let prev_frame_idx = stack.len() - 2; + + let else_node_id = graph.add_node(); + stack[active_frame_idx].active_node = else_node_id; + + let prev_node_id = stack[prev_frame_idx].active_node; + graph.new_forward_edge(prev_node_id, else_node_id); + graph.set_first_instr_pos(else_node_id, cursor + 1); + } + Instruction::End => { + let closing_frame = stack.pop() + .expect("module is valid by pre-condition; ends correspond to control stack frames; qed"); + + graph.new_forward_edge(active_node_id, closing_frame.exit_node); + graph.set_first_instr_pos(closing_frame.exit_node, cursor + 1); + + if let Some(active_frame) = stack.last_mut() { + active_frame.active_node = closing_frame.exit_node; + } + } + Instruction::Br(label) => { + graph.increment_actual_cost(active_node_id, instruction_cost); + + let active_frame_idx = stack.len() - 1; + let target_frame_idx = active_frame_idx - (label as usize); + graph.new_edge(active_node_id, &stack[target_frame_idx]); + + // Next instruction is unreachable, but carry on anyway. + let new_node_id = graph.add_node(); + stack[active_frame_idx].active_node = new_node_id; + graph.set_first_instr_pos(new_node_id, cursor + 1); + } + Instruction::BrIf(label) => { + graph.increment_actual_cost(active_node_id, instruction_cost); + + let active_frame_idx = stack.len() - 1; + let target_frame_idx = active_frame_idx - (label as usize); + graph.new_edge(active_node_id, &stack[target_frame_idx]); + + let new_node_id = graph.add_node(); + stack[active_frame_idx].active_node = new_node_id; + graph.new_forward_edge(active_node_id, new_node_id); + graph.set_first_instr_pos(new_node_id, cursor + 1); + } + Instruction::BrTable(ref label_vec, label_default) => { + graph.increment_actual_cost(active_node_id, instruction_cost); + + let active_frame_idx = stack.len() - 1; + for &label in [label_default].iter().chain(label_vec.iter()) { + let target_frame_idx = active_frame_idx - (label as usize); + graph.new_edge(active_node_id, &stack[target_frame_idx]); + } + + let new_node_id = graph.add_node(); + stack[active_frame_idx].active_node = new_node_id; + graph.set_first_instr_pos(new_node_id, cursor + 1); + } + Instruction::Return => { + graph.increment_actual_cost(active_node_id, instruction_cost); + + graph.new_forward_edge(active_node_id, terminal_node_id); + + let active_frame_idx = stack.len() - 1; + let new_node_id = graph.add_node(); + stack[active_frame_idx].active_node = new_node_id; + graph.set_first_instr_pos(new_node_id, cursor + 1); + } + _ => graph.increment_actual_cost(active_node_id, instruction_cost), + } + } + + assert!(stack.is_empty()); + + Ok(graph) +} + +/// Exhaustively search through all paths in the control flow graph, starting from the first node +/// and ensure that 1) all paths with only forward edges ending with the terminal node have an +/// equal total actual gas cost and total charged gas cost, and 2) all cycles beginning with a loop +/// entry point and ending with a node with a loop-back edge to the entry point have equal actual +/// and charged gas costs. If this returns true, then the metered blocks used to construct the +/// control flow graph are correct with respect to the function body. +/// +/// In the worst case, this runs in time exponential in the size of the graph. +fn validate_graph_gas_costs(graph: &ControlFlowGraph) -> bool { + fn visit( + graph: &ControlFlowGraph, + node_id: NodeId, + mut total_actual: u32, + mut total_charged: u32, + loop_costs: &mut HashMap, + ) -> bool { + let node = graph.get_node(node_id); + + total_actual += node.actual_cost; + total_charged += node.charged_cost; + + if node.is_loop_target { + loop_costs.insert(node_id, (node.actual_cost, node.charged_cost)); + } + + if node.forward_edges.is_empty() && total_actual != total_charged { + return false; + } + + for loop_node_id in node.loopback_edges.iter() { + let (ref mut loop_actual, ref mut loop_charged) = loop_costs.get_mut(loop_node_id) + .expect("cannot arrive at loopback edge without visiting loop entry node"); + if loop_actual != loop_charged { + return false; + } + } + + for next_node_id in node.forward_edges.iter() { + if !visit(graph, *next_node_id, total_actual, total_charged, loop_costs) { + return false; + } + } + + if node.is_loop_target { + loop_costs.remove(&node_id); + } + + true + } + + // Recursively explore all paths through the execution graph starting from the entry node. + visit(graph, 0, 0, 0, &mut HashMap::new()) +} + +/// Validate that the metered blocks are correct with respect to the function body by exhaustively +/// searching all paths through the control flow graph. +/// +/// This assumes that the function body has been validated already, otherwise this may panic. +fn validate_metering_injections( + body: &FuncBody, + rules: &RuleSet, + blocks: &[MeteredBlock] +) -> Result { + let graph = build_control_flow_graph(body, rules, blocks)?; + Ok(validate_graph_gas_costs(&graph)) +} + +mod tests { + use super::*; + use super::super::determine_metered_blocks; + + use parity_wasm::elements; + use binaryen::tools::translate_to_fuzz_mvp; + use rand::{thread_rng, RngCore}; + + #[test] + fn test_build_control_flow_graph() { + for _ in 0..20 { + let mut rand_input = [0u8; 2048]; + thread_rng().fill_bytes(&mut rand_input); + + let module_bytes = translate_to_fuzz_mvp(&rand_input).write(); + let module: elements::Module = elements::deserialize_buffer(&module_bytes) + .expect("failed to parse Wasm blob generated by translate_to_fuzz"); + + for func_body in module.code_section().iter().flat_map(|section| section.bodies()) { + let rules = RuleSet::default(); + + let metered_blocks = determine_metered_blocks(func_body.code(), &rules).unwrap(); + let success = validate_metering_injections(func_body, &rules, &metered_blocks).unwrap(); + assert!(success); + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index dd0c685..1b66ae2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,8 @@ extern crate byteorder; extern crate parity_wasm; #[macro_use] extern crate log; #[cfg(test)] #[macro_use] extern crate indoc; +extern crate rand; +extern crate binaryen; pub mod rules;