From 5180d694ce49a0e7095960a7878a931f50dbe767 Mon Sep 17 00:00:00 2001
From: Jim Posen <jim.posen@gmail.com>
Date: Fri, 12 Jul 2019 14:48:15 +0200
Subject: [PATCH] Validate the gas metering algorithm using fuzzer.

---
 Cargo.toml                 |   2 +
 src/{gas.rs => gas/mod.rs} |  30 ++-
 src/gas/validation.rs      | 370 +++++++++++++++++++++++++++++++++++++
 src/lib.rs                 |   2 +
 4 files changed, 394 insertions(+), 10 deletions(-)
 rename src/{gas.rs => gas/mod.rs} (98%)
 create mode 100644 src/gas/validation.rs
diff --git a/Cargo.toml b/Cargo.toml
index 405bd3c..e221c41 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,8 @@ tempdir = "0.3"
 wabt = "0.2"
 diff = "0.1.11"
 indoc = "0.3"
+rand = "0.7"
+binaryen = "0.8"
 
 [features]
 default = ["std"]
diff --git a/src/gas.rs b/src/gas/mod.rs
similarity index 98%
rename from src/gas.rs
rename to src/gas/mod.rs
index 2efd82b..59d728a 100644
--- a/src/gas.rs
+++ b/src/gas/mod.rs
@@ -4,6 +4,9 @@
 //! module into one that charges gas for code to be executed. See function documentation for usage
 //! and details.
 
+#[cfg(test)]
+mod validation;
+
 use std::cmp::min;
 use std::mem;
 use std::vec::Vec;
@@ -62,7 +65,7 @@ struct ControlBlock {
 /// are constructed with the property that, in the absence of any traps, either all instructions in
 /// the block are executed or none are.
 #[derive(Debug)]
-struct MeteredBlock {
+pub(crate) struct MeteredBlock {
 	/// Index of the first instruction (aka `Opcode`) in the block.
 	start_pos: usize,
 	/// Sum of costs of all instructions until end of the block.
@@ -256,11 +259,10 @@ fn add_grow_counter(module: elements::Module, rules: &rules::Set, gas_func: u32)
 	b.build()
 }
 
-pub fn inject_counter(
-	instructions: &mut elements::Instructions,
+pub(crate) fn determine_metered_blocks(
+	instructions: &elements::Instructions,
 	rules: &rules::Set,
-	gas_func: u32,
-) -> Result<(), ()> {
+) -> Result<Vec<MeteredBlock>, ()> {
 	use parity_wasm::elements::Instruction::*;
 
 	let mut counter = Counter::new();
@@ -325,13 +327,23 @@ pub fn inject_counter(
 		}
 	}
 
-	insert_metering_calls(instructions, counter.finalized_blocks, gas_func)
+	counter.finalized_blocks.sort_unstable_by_key(|block| block.start_pos);
+	Ok(counter.finalized_blocks)
+}
+
+pub fn inject_counter(
+	instructions: &mut elements::Instructions,
+	rules: &rules::Set,
+	gas_func: u32,
+) -> Result<(), ()> {
+	let blocks = determine_metered_blocks(instructions, rules)?;
+	insert_metering_calls(instructions, blocks, gas_func)
 }
 
 // Then insert metering calls into a sequence of instructions given the block locations and costs.
 fn insert_metering_calls(
 	instructions: &mut elements::Instructions,
-	mut blocks: Vec<MeteredBlock>,
+	blocks: Vec<MeteredBlock>,
 	gas_func: u32,
 )
 	-> Result<(), ()>
@@ -346,9 +358,7 @@ fn insert_metering_calls(
 	);
 	let new_instrs = instructions.elements_mut();
 
-	blocks.sort_unstable_by_key(|block| block.start_pos);
 	let mut block_iter = blocks.into_iter().peekable();
-
 	for (original_pos, instr) in original_instrs.into_iter().enumerate() {
 		// If there the next block starts at this position, inject metering instructions.
 		let used_block = if let Some(ref block) = block_iter.peek() {
@@ -494,7 +504,7 @@ mod tests {
 	use super::*;
 	use rules;
 
-	fn get_function_body(module: &elements::Module, index: usize)
+	pub fn get_function_body(module: &elements::Module, index: usize)
 		-> Option<&[elements::Instruction]>
 	{
 		module.code_section()
diff --git a/src/gas/validation.rs b/src/gas/validation.rs
new file mode 100644
index 0000000..b367366
--- /dev/null
+++ b/src/gas/validation.rs
@@ -0,0 +1,370 @@
+//! This module is used to validate the correctness of the gas metering algorithm.
+//!
+//! Since the gas metering algorithm is complex, this checks correctness by fuzzing. The testing
+//! strategy is to generate random, valid Wasm modules using Binaryen's translate-to-fuzz
+//! functionality, then ensure for all functions defined, in all execution paths though the
+//! function body that do not trap that the amount of gas charged by the proposed metering
+//! instructions is correct. This is done by constructing a control flow graph and exhaustively
+//! searching though all paths, which may take exponential time in the size of the function body in
+//! the worst case.
+
+use super::MeteredBlock;
+use rules::Set as RuleSet;
+use parity_wasm::elements::{FuncBody, Instruction};
+
+use std::collections::HashMap;
+
+/// An ID for a node in a ControlFlowGraph.
+type NodeId = usize;
+
+/// A node in a control flow graph is commonly known as a basic block. This is a sequence of
+/// operations that are always executed sequentially.
+#[derive(Debug)]
+struct ControlFlowNode {
+	/// The index of the first instruction in the basic block. This is only used for debugging.
+	first_instr_pos: Option<usize>,
+
+	/// The actual gas cost of executing all instructions in the basic block.
+	actual_cost: u32,
+
+	/// The amount of gas charged by the injected metering instructions within this basic block.
+	charged_cost: u32,
+
+	/// Whether there are any other nodes in the graph that loop back to this one. Every cycle in
+	/// the control flow graph contains at least one node with this flag set.
+	is_loop_target: bool,
+
+	/// Edges in the "forward" direction of the graph. The graph of nodes and their forward edges
+	/// forms a directed acyclic graph (DAG).
+	forward_edges: Vec<NodeId>,
+
+	/// Edges in the "backwards" direction. These edges form cycles in the graph.
+	loopback_edges: Vec<NodeId>,
+}
+
+impl Default for ControlFlowNode {
+	fn default() -> Self {
+		ControlFlowNode {
+			first_instr_pos: None,
+			actual_cost: 0,
+			charged_cost: 0,
+			is_loop_target: false,
+			forward_edges: Vec::new(),
+			loopback_edges: Vec::new(),
+		}
+	}
+}
+
+/// A control flow graph where nodes are basic blocks and edges represent possible transitions
+/// between them in execution flow. The graph has two types of edges, forward and loop-back edges.
+/// The subgraph with only the forward edges forms a directed acyclic graph (DAG); including the
+/// loop-back edges introduces cycles.
+#[derive(Debug)]
+pub struct ControlFlowGraph {
+	nodes: Vec<ControlFlowNode>,
+}
+
+impl ControlFlowGraph {
+	fn new() -> Self {
+		ControlFlowGraph {
+			nodes: Vec::new(),
+		}
+	}
+
+	fn get_node(&self, node_id: NodeId) -> &ControlFlowNode {
+		self.nodes.get(node_id).unwrap()
+	}
+
+	fn get_node_mut(&mut self, node_id: NodeId) -> &mut ControlFlowNode {
+		self.nodes.get_mut(node_id).unwrap()
+	}
+
+	fn add_node(&mut self) -> NodeId {
+		self.nodes.push(ControlFlowNode::default());
+		self.nodes.len() - 1
+	}
+
+	fn increment_actual_cost(&mut self, node_id: NodeId, cost: u32) {
+		self.get_node_mut(node_id).actual_cost += cost;
+	}
+
+	fn increment_charged_cost(&mut self, node_id: NodeId, cost: u32) {
+		self.get_node_mut(node_id).charged_cost += cost;
+	}
+
+	fn set_first_instr_pos(&mut self, node_id: NodeId, first_instr_pos: usize) {
+		self.get_node_mut(node_id).first_instr_pos = Some(first_instr_pos)
+	}
+
+	fn new_edge(&mut self, from_id: NodeId, target_frame: &ControlFrame) {
+		if target_frame.is_loop {
+			self.new_loopback_edge(from_id, target_frame.entry_node);
+		} else {
+			self.new_forward_edge(from_id, target_frame.exit_node);
+		}
+	}
+
+	fn new_forward_edge(&mut self, from_id: NodeId, to_id: NodeId) {
+		self.get_node_mut(from_id).forward_edges.push(to_id)
+	}
+
+	fn new_loopback_edge(&mut self, from_id: NodeId, to_id: NodeId) {
+		self.get_node_mut(from_id).loopback_edges.push(to_id);
+		self.get_node_mut(to_id).is_loop_target = true;
+	}
+}
+
+/// A control frame is opened upon entry into a function and by the `block`, `if`, and `loop`
+/// instructions and is closed by `end` instructions.
+struct ControlFrame {
+	is_loop: bool,
+	entry_node: NodeId,
+	exit_node: NodeId,
+	active_node: NodeId,
+}
+
+impl ControlFrame {
+	fn new(entry_node_id: NodeId, exit_node_id: NodeId, is_loop: bool) -> Self {
+		ControlFrame {
+			is_loop,
+			entry_node: entry_node_id,
+			exit_node: exit_node_id,
+			active_node: entry_node_id,
+		}
+	}
+}
+
+/// Construct a control flow graph from a function body and the metered blocks computed for it.
+///
+/// This assumes that the function body has been validated already, otherwise this may panic.
+fn build_control_flow_graph(
+	body: &FuncBody,
+	rules: &RuleSet,
+	blocks: &[MeteredBlock]
+) -> Result<ControlFlowGraph, ()> {
+	let mut graph = ControlFlowGraph::new();
+
+	let entry_node_id = graph.add_node();
+	let terminal_node_id = graph.add_node();
+
+	graph.set_first_instr_pos(entry_node_id, 0);
+
+	let mut stack = Vec::new();
+	stack.push(ControlFrame::new(entry_node_id, terminal_node_id, false));
+
+	let mut metered_blocks_iter = blocks.iter().peekable();
+	for (cursor, instruction) in body.code().elements().iter().enumerate() {
+		let active_node_id = stack.last()
+			.expect("module is valid by pre-condition; control stack must not be empty; qed")
+			.active_node;
+
+		// Increment the charged cost if there are metering instructions to be inserted here.
+		let apply_block = metered_blocks_iter.peek()
+			.map_or(false, |block| block.start_pos == cursor);
+		if apply_block {
+			let next_metered_block = metered_blocks_iter.next()
+				.expect("peek returned an item; qed");
+			graph.increment_charged_cost(active_node_id, next_metered_block.cost);
+		}
+
+		let instruction_cost = rules.process(instruction)?;
+		match *instruction {
+			Instruction::Block(_) => {
+				graph.increment_actual_cost(active_node_id, instruction_cost);
+
+				let exit_node_id = graph.add_node();
+				stack.push(ControlFrame::new(active_node_id, exit_node_id, false));
+			}
+			Instruction::If(_) => {
+				graph.increment_actual_cost(active_node_id, instruction_cost);
+
+				let then_node_id = graph.add_node();
+				let exit_node_id = graph.add_node();
+
+				stack.push(ControlFrame::new(then_node_id, exit_node_id, false));
+				graph.new_forward_edge(active_node_id, then_node_id);
+				graph.set_first_instr_pos(then_node_id, cursor + 1);
+			}
+			Instruction::Loop(_) => {
+				graph.increment_actual_cost(active_node_id, instruction_cost);
+
+				let loop_node_id = graph.add_node();
+				let exit_node_id = graph.add_node();
+
+				stack.push(ControlFrame::new(loop_node_id, exit_node_id, true));
+				graph.new_forward_edge(active_node_id, loop_node_id);
+				graph.set_first_instr_pos(loop_node_id, cursor + 1);
+			}
+			Instruction::Else => {
+				let active_frame_idx = stack.len() - 1;
+				let prev_frame_idx = stack.len() - 2;
+
+				let else_node_id = graph.add_node();
+				stack[active_frame_idx].active_node = else_node_id;
+
+				let prev_node_id = stack[prev_frame_idx].active_node;
+				graph.new_forward_edge(prev_node_id, else_node_id);
+				graph.set_first_instr_pos(else_node_id, cursor + 1);
+			}
+			Instruction::End => {
+				let closing_frame = stack.pop()
+					.expect("module is valid by pre-condition; ends correspond to control stack frames; qed");
+
+				graph.new_forward_edge(active_node_id, closing_frame.exit_node);
+				graph.set_first_instr_pos(closing_frame.exit_node, cursor + 1);
+
+				if let Some(active_frame) = stack.last_mut() {
+					active_frame.active_node = closing_frame.exit_node;
+				}
+			}
+			Instruction::Br(label) => {
+				graph.increment_actual_cost(active_node_id, instruction_cost);
+
+				let active_frame_idx = stack.len() - 1;
+				let target_frame_idx = active_frame_idx - (label as usize);
+				graph.new_edge(active_node_id, &stack[target_frame_idx]);
+
+				// Next instruction is unreachable, but carry on anyway.
+				let new_node_id = graph.add_node();
+				stack[active_frame_idx].active_node = new_node_id;
+				graph.set_first_instr_pos(new_node_id, cursor + 1);
+			}
+			Instruction::BrIf(label) => {
+				graph.increment_actual_cost(active_node_id, instruction_cost);
+
+				let active_frame_idx = stack.len() - 1;
+				let target_frame_idx = active_frame_idx - (label as usize);
+				graph.new_edge(active_node_id, &stack[target_frame_idx]);
+
+				let new_node_id = graph.add_node();
+				stack[active_frame_idx].active_node = new_node_id;
+				graph.new_forward_edge(active_node_id, new_node_id);
+				graph.set_first_instr_pos(new_node_id, cursor + 1);
+			}
+			Instruction::BrTable(ref label_vec, label_default) => {
+				graph.increment_actual_cost(active_node_id, instruction_cost);
+
+				let active_frame_idx = stack.len() - 1;
+				for &label in [label_default].iter().chain(label_vec.iter()) {
+					let target_frame_idx = active_frame_idx - (label as usize);
+					graph.new_edge(active_node_id, &stack[target_frame_idx]);
+				}
+
+				let new_node_id = graph.add_node();
+				stack[active_frame_idx].active_node = new_node_id;
+				graph.set_first_instr_pos(new_node_id, cursor + 1);
+			}
+			Instruction::Return => {
+				graph.increment_actual_cost(active_node_id, instruction_cost);
+
+				graph.new_forward_edge(active_node_id, terminal_node_id);
+
+				let active_frame_idx = stack.len() - 1;
+				let new_node_id = graph.add_node();
+				stack[active_frame_idx].active_node = new_node_id;
+				graph.set_first_instr_pos(new_node_id, cursor + 1);
+			}
+			_ => graph.increment_actual_cost(active_node_id, instruction_cost),
+		}
+	}
+
+	assert!(stack.is_empty());
+
+	Ok(graph)
+}
+
+/// Exhaustively search through all paths in the control flow graph, starting from the first node
+/// and ensure that 1) all paths with only forward edges ending with the terminal node have an
+/// equal total actual gas cost and total charged gas cost, and 2) all cycles beginning with a loop
+/// entry point and ending with a node with a loop-back edge to the entry point have equal actual
+/// and charged gas costs. If this returns true, then the metered blocks used to construct the
+/// control flow graph are correct with respect to the function body.
+///
+/// In the worst case, this runs in time exponential in the size of the graph.
+fn validate_graph_gas_costs(graph: &ControlFlowGraph) -> bool {
+	fn visit(
+		graph: &ControlFlowGraph,
+		node_id: NodeId,
+		mut total_actual: u32,
+		mut total_charged: u32,
+		loop_costs: &mut HashMap<NodeId, (u32, u32)>,
+	) -> bool {
+		let node = graph.get_node(node_id);
+
+		total_actual += node.actual_cost;
+		total_charged += node.charged_cost;
+
+		if node.is_loop_target {
+			loop_costs.insert(node_id, (node.actual_cost, node.charged_cost));
+		}
+
+		if node.forward_edges.is_empty() && total_actual != total_charged {
+			return false;
+		}
+
+		for loop_node_id in node.loopback_edges.iter() {
+			let (ref mut loop_actual, ref mut loop_charged) = loop_costs.get_mut(loop_node_id)
+				.expect("cannot arrive at loopback edge without visiting loop entry node");
+			if loop_actual != loop_charged {
+				return false;
+			}
+		}
+
+		for next_node_id in node.forward_edges.iter() {
+			if !visit(graph, *next_node_id, total_actual, total_charged, loop_costs) {
+				return false;
+			}
+		}
+
+		if node.is_loop_target {
+			loop_costs.remove(&node_id);
+		}
+
+		true
+	}
+
+	// Recursively explore all paths through the execution graph starting from the entry node.
+	visit(graph, 0, 0, 0, &mut HashMap::new())
+}
+
+/// Validate that the metered blocks are correct with respect to the function body by exhaustively
+/// searching all paths through the control flow graph.
+///
+/// This assumes that the function body has been validated already, otherwise this may panic.
+fn validate_metering_injections(
+	body: &FuncBody,
+	rules: &RuleSet,
+	blocks: &[MeteredBlock]
+) -> Result<bool, ()> {
+	let graph = build_control_flow_graph(body, rules, blocks)?;
+	Ok(validate_graph_gas_costs(&graph))
+}
+
+mod tests {
+	use super::*;
+	use super::super::determine_metered_blocks;
+
+	use parity_wasm::elements;
+	use binaryen::tools::translate_to_fuzz_mvp;
+	use rand::{thread_rng, RngCore};
+
+	#[test]
+	fn test_build_control_flow_graph() {
+		for _ in 0..20 {
+			let mut rand_input = [0u8; 2048];
+			thread_rng().fill_bytes(&mut rand_input);
+
+			let module_bytes = translate_to_fuzz_mvp(&rand_input).write();
+			let module: elements::Module = elements::deserialize_buffer(&module_bytes)
+				.expect("failed to parse Wasm blob generated by translate_to_fuzz");
+
+			for func_body in module.code_section().iter().flat_map(|section| section.bodies()) {
+				let rules = RuleSet::default();
+
+				let metered_blocks = determine_metered_blocks(func_body.code(), &rules).unwrap();
+				let success = validate_metering_injections(func_body, &rules, &metered_blocks).unwrap();
+				assert!(success);
+			}
+		}
+	}
+}
diff --git a/src/lib.rs b/src/lib.rs
index dd0c685..1b66ae2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,6 +9,8 @@ extern crate byteorder;
 extern crate parity_wasm;
 #[macro_use] extern crate log;
 #[cfg(test)] #[macro_use] extern crate indoc;
+extern crate rand;
+extern crate binaryen;
 
 
 pub mod rules;