feat(interpreter-cid,interpreter-data)!: Support for multiple hash types in CID verification (#722)

It will allow to change CID hash functions without breaking compatibility or use multiple CID hash functions.
This commit is contained in:
Ivan Boldyrev 2023-11-22 22:12:42 +04:00 committed by GitHub
parent dc8afdeeff
commit 524c30243b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 360 additions and 72 deletions

View File

@ -18,6 +18,7 @@
/// values forged in the CID stores.
use air::ExecutionCidState;
use air::PreparationError;
use air_interpreter_cid::CidVerificationError;
use air_interpreter_signatures::PeerCidTracker;
use air_interpreter_signatures::PublicKey;
use air_interpreter_signatures::SignatureStore;
@ -92,10 +93,13 @@ fn test_attack_replace_value() {
assert_error_eq!(
&res,
PreparationError::CidStoreVerificationError(CidStoreVerificationError::MismatchError {
type_name: "serde_json::value::Value",
cid_repr: "bagaaihrayhxgqijfajraxivb7hxwshhbsdqk4j5zyqypb54zggmn5v7mmwxq".into(),
})
PreparationError::CidStoreVerificationError(
CidVerificationError::ValueMismatch {
type_name: "serde_json::value::Value",
cid_repr: "bagaaihrayhxgqijfajraxivb7hxwshhbsdqk4j5zyqypb54zggmn5v7mmwxq".into(),
}
.into()
)
);
}
@ -165,10 +169,13 @@ fn test_attack_replace_tetraplet() {
assert_error_eq!(
&res,
PreparationError::CidStoreVerificationError(CidStoreVerificationError::MismatchError {
type_name: "marine_call_parameters::SecurityTetraplet",
cid_repr: "bagaaihraqlxlbr5q54odmlqwrzpw4smuxzzqbrfas6c7ajhb73samtrjkkva".into(),
})
PreparationError::CidStoreVerificationError(
CidVerificationError::ValueMismatch {
type_name: "marine_call_parameters::SecurityTetraplet",
cid_repr: "bagaaihraqlxlbr5q54odmlqwrzpw4smuxzzqbrfas6c7ajhb73samtrjkkva".into(),
}
.into()
)
);
}
@ -245,10 +252,13 @@ fn test_attack_replace_call_result() {
assert_error_eq!(
&res,
PreparationError::CidStoreVerificationError(CidStoreVerificationError::MismatchError {
type_name: "air_interpreter_data::executed_state::ServiceResultCidAggregate",
cid_repr: "bagaaihrai3ii6rephch2kv2efkbolmhfjvpj2w3fyr2tj6lavd4yiloy2ybq".into(),
})
PreparationError::CidStoreVerificationError(
CidVerificationError::ValueMismatch {
type_name: "air_interpreter_data::executed_state::ServiceResultCidAggregate",
cid_repr: "bagaaihrai3ii6rephch2kv2efkbolmhfjvpj2w3fyr2tj6lavd4yiloy2ybq".into(),
}
.into()
)
);
}
@ -332,10 +342,13 @@ fn test_attack_replace_canon_value() {
assert_error_eq!(
&res,
PreparationError::CidStoreVerificationError(CidStoreVerificationError::MismatchError {
type_name: "air_interpreter_data::executed_state::CanonCidAggregate",
cid_repr: "bagaaihram3i44lmbxmukumwohtp2dkocgdqjwzixddzxjmzlvhea7aid5l7q".into(),
})
PreparationError::CidStoreVerificationError(
CidVerificationError::ValueMismatch {
type_name: "air_interpreter_data::executed_state::CanonCidAggregate",
cid_repr: "bagaaihram3i44lmbxmukumwohtp2dkocgdqjwzixddzxjmzlvhea7aid5l7q".into(),
}
.into()
)
);
}
@ -428,10 +441,13 @@ fn test_attack_replace_canon_result_values() {
assert_error_eq!(
&res,
PreparationError::CidStoreVerificationError(CidStoreVerificationError::MismatchError {
type_name: "air_interpreter_data::executed_state::CanonResultCidAggregate",
cid_repr: "bagaaihrar7xfyl5usjhn5s6xisvwkh55zyq5lvjnwr6j5j3yjutf55aowqea".into(),
})
PreparationError::CidStoreVerificationError(
CidVerificationError::ValueMismatch {
type_name: "air_interpreter_data::executed_state::CanonResultCidAggregate",
cid_repr: "bagaaihrar7xfyl5usjhn5s6xisvwkh55zyq5lvjnwr6j5j3yjutf55aowqea".into(),
}
.into()
)
);
}
@ -528,9 +544,12 @@ fn test_attack_replace_canon_result_tetraplet() {
assert_error_eq!(
&res,
PreparationError::CidStoreVerificationError(CidStoreVerificationError::MismatchError {
type_name: "air_interpreter_data::executed_state::CanonResultCidAggregate",
cid_repr: "bagaaihrar7xfyl5usjhn5s6xisvwkh55zyq5lvjnwr6j5j3yjutf55aowqea".into(),
})
PreparationError::CidStoreVerificationError(
CidVerificationError::ValueMismatch {
type_name: "air_interpreter_data::executed_state::CanonResultCidAggregate",
cid_repr: "bagaaihrar7xfyl5usjhn5s6xisvwkh55zyq5lvjnwr6j5j3yjutf55aowqea".into(),
}
.into()
)
);
}

View File

@ -11,9 +11,13 @@ keywords = ["fluence", "air", "programming-language", "cid", "ipld"]
categories = ["wasm"]
[dependencies]
blake3 = "1.5.0"
cid = { version = "0.10.1", default-features = false, features = ["std"] }
multihash = { version = "0.18.1", default-features = false, features = ["multihash-impl", "std", "blake3"] }
multihash = { version = "0.18.1", default-features = false, features = ["multihash-impl", "std", "sha2", "blake3"] }
serde = { version = "1.0.190", features = ["derive", "rc"] }
serde_json = "1.0.108"
serde_json = "1.0.95"
thiserror = "1.0.49"
# beware: `digest` version should match one of the used in particular hash crates
digest = "0.10.7"
sha2 = "0.10.7"
blake3 = { version = "1.5.0", features = ["traits-preview"] }

View File

@ -26,8 +26,13 @@
unreachable_patterns
)]
mod verify;
pub use crate::verify::{verify_value, CidVerificationError};
use serde::Deserialize;
use serde::Serialize;
use thiserror::Error as ThisError;
use std::fmt;
use std::io::BufWriter;
@ -37,9 +42,12 @@ use std::rc::Rc;
/// Should-be-opaque type for the inner representation of CID.
/// It has to be serializable and Borsh-serializable, as well as implement `Debug`, `Eq`, `Ord`, `Hash` and similar
/// basic traits. It is also can be unsized.
// You should be able to replace it with [u8], and most of the code will just work.
// you should be able to replace it with [u8], and most of the code will just work
pub type CidRef = str;
// there is no Rust multicodec crate with appropriate constants
const JSON_CODEC: u64 = 0x0200;
#[derive(Serialize, Deserialize)]
#[serde(transparent)]
pub struct CID<T: ?Sized>(Rc<CidRef>, #[serde(skip)] PhantomData<*const T>);
@ -93,34 +101,24 @@ impl<Val> std::hash::Hash for CID<Val> {
}
}
pub struct CidCalculationError(serde_json::Error);
impl<T: ?Sized> std::convert::TryFrom<&'_ CID<T>> for cid::Cid {
type Error = cid::Error;
impl fmt::Debug for CidCalculationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.0, f)
fn try_from(value: &CID<T>) -> Result<Self, Self::Error> {
use std::str::FromStr;
cid::Cid::from_str(&value.0)
}
}
impl fmt::Display for CidCalculationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}
impl From<serde_json::Error> for CidCalculationError {
fn from(source: serde_json::Error) -> Self {
Self(source)
}
}
impl std::error::Error for CidCalculationError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.0)
}
#[derive(Debug, ThisError)]
pub enum CidCalculationError {
#[error(transparent)]
InvalidJson(#[from] serde_json::Error),
}
/// Calculate a CID of JSON-serialized value.
// TODO we might refactor this to `SerializationFormat` trait
// TODO we might refactor this to `SerializationCodec` trait
// that both transform data to binary/text form (be it JSON, CBOR or something else)
// and produces CID too
pub fn value_to_json_cid<Val: Serialize + ?Sized>(
@ -129,16 +127,53 @@ pub fn value_to_json_cid<Val: Serialize + ?Sized>(
use cid::Cid;
use multihash::{Code, MultihashDigest};
let mut hasher = blake3::Hasher::new();
serde_json::to_writer(BufWriter::with_capacity(8 * 1024, &mut hasher), value)?;
let hash = hasher.finalize();
let hash = value_json_hash::<blake3::Hasher, Val>(value)?;
let digest = Code::Blake3_256
.wrap(hash.as_bytes())
.wrap(&hash)
.expect("can't happend: incorrect hash length");
// seems to be better than RAW_CODEC = 0x55
const JSON_CODEC: u64 = 0x0200;
let cid = Cid::new_v1(JSON_CODEC, digest);
Ok(CID::new(cid.to_string()))
}
pub(crate) fn value_json_hash<D: digest::Digest + std::io::Write, Val: Serialize + ?Sized>(
value: &Val,
) -> Result<Vec<u8>, serde_json::Error> {
const HASH_BUFFER_SIZE: usize = 8 * 1024;
let mut hasher = D::new();
serde_json::to_writer(
BufWriter::with_capacity(HASH_BUFFER_SIZE, &mut hasher),
value,
)?;
let hash = hasher.finalize();
Ok(hash.to_vec())
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_cid_default() {
assert_eq!(
value_to_json_cid(&json!("test")).unwrap(),
CID::new("bagaaihrarcyykpv4oj7zwdbepczyfthxya4og7s2rwvrzolm5kg2eu5dz3xa")
);
assert_eq!(
value_to_json_cid(&json!([1, 2, 3])).unwrap(),
CID::new("bagaaihram6sitn77tquub77n2jzjgttrlwkverv44pv3gns6qghm6hx6d36a"),
);
assert_eq!(
value_to_json_cid(&json!(1)).unwrap(),
CID::new("bagaaihra2y55tkbgv6i4d7vdoglfuzhbd3ra6e7ennpvfrmzaejwmbntusdq"),
);
assert_eq!(
value_to_json_cid(&json!({"key": 42})).unwrap(),
CID::new("bagaaihracpzxhsrpviexa7k6glwdhyh3a4kvy6j7qlcqokzqbs3q424cmxyq"),
);
}
}

View File

@ -0,0 +1,241 @@
/*
* Copyright 2023 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::{value_json_hash, CidRef, CID, JSON_CODEC};
use serde::Serialize;
use thiserror::Error as ThisError;
use std::convert::TryInto;
use std::rc::Rc;
#[derive(ThisError, Debug)]
pub enum CidVerificationError {
#[error("Value mismatch in the {type_name:?} store for CID {cid_repr:?}")]
ValueMismatch {
// nb: type_name is std::any::type_name() result that may be inconsistent between the Rust compiler versions
type_name: &'static str,
cid_repr: Rc<CidRef>,
},
#[error("JSON error: {0}")]
InvalidJson(#[from] serde_json::Error),
#[error(transparent)]
MalformedCid(#[from] cid::Error),
#[error("unsupported CID codec: {0}")]
UnsupportedCidCodec(u64),
#[error("unsupported multihash code: {0}")]
UnsupportedHashCode(u64),
}
pub fn verify_value<Val: Serialize>(
cid: &CID<Val>,
value: &Val,
) -> Result<(), CidVerificationError> {
let real_cid: cid::Cid = cid.try_into()?;
let codec = real_cid.codec();
match codec {
JSON_CODEC => verify_json_value(real_cid.hash(), value, cid),
_ => Err(CidVerificationError::UnsupportedCidCodec(codec)),
}
}
fn verify_json_value<Val: Serialize>(
mhash: &multihash::Multihash,
value: &Val,
cid: &CID<Val>,
) -> Result<(), CidVerificationError> {
use multihash::Code;
let raw_code = mhash.code();
let code: Code = raw_code
.try_into()
.map_err(|_| CidVerificationError::UnsupportedHashCode(raw_code))?;
let expected_hash = match code {
Code::Sha2_256 => value_json_hash::<sha2::Sha256, Val>(value)?,
Code::Blake3_256 => value_json_hash::<blake3::Hasher, Val>(value)?,
_ => return Err(CidVerificationError::UnsupportedHashCode(raw_code)),
};
// actually, multihash may contain less bytes than the full hash; to avoid abuse, we reject such multihashes
if expected_hash == mhash.digest() {
Ok(())
} else {
Err(CidVerificationError::ValueMismatch {
type_name: std::any::type_name::<Val>(),
cid_repr: cid.get_inner(),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use multihash::Multihash;
use serde_json::json;
#[test]
fn test_verify_sha2_256() {
verify_value(
&CID::new("bagaaierajwlhumardpzj6dv2ahcerm3vyfrjwl7nahg7zq5o3eprwv6v3vpa"),
&json!("test"),
)
.unwrap();
verify_value(
&CID::new("bagaaierauyk65lxcdxsrphpaqdpiymcszdnjaejyibv2ohbyyaziix35kt2a"),
&json!([1, 2, 3]),
)
.unwrap();
verify_value(
&CID::new("bagaaieranodle477gt6odhllqbhp6wr7k5d23jhkuixr2soadzjn3n4hlnfq"),
&json!(1),
)
.unwrap();
verify_value(
&CID::new("bagaaierad7lci6475zdrps4h6fmcpmqyknz5z6bw6p6tmpjkfyueavqw4kaq"),
&json!({"key": 42}),
)
.unwrap();
}
#[test]
fn test_verify_blake3() {
verify_value(
&CID::new("z3v8BBKBcZMDh6ANTaiT7PmfrBWbBmoVQvDxojXt1M4eczFDmhF"),
&json!("test"),
)
.unwrap();
verify_value(
&CID::new("z3v8BBK9PYQwY7AGn9wb79BFTzSQiLALGAEmyqSYbCV2D9y8RLw"),
&json!([1, 2, 3]),
)
.unwrap();
verify_value(
&CID::new("z3v8BBKGqF5gxukC6oU2EsSnTD7hBRorAabGJ8UDpNKneW7UApe"),
&json!(1),
)
.unwrap();
verify_value(
&CID::new("z3v8BBK3kqxb39bomB9bJQ22a734aidv5C7QmjdfKiePgVjdQUQ"),
&json!({"key": 42}),
)
.unwrap();
}
#[test]
fn test_incorrect_value() {
// CID of json!(1)
let cid_1 = CID::new("bagaaieranodle477gt6odhllqbhp6wr7k5d23jhkuixr2soadzjn3n4hlnfq");
let err = verify_value(&cid_1, &json!(2));
assert!(
matches!(err, Err(CidVerificationError::ValueMismatch { .. })),
"{:?}",
err
);
}
#[test]
fn test_verify_unknown_codec() {
use std::str::FromStr;
// git raw object
const GIT_RAW_CODEC: u64 = 0x78;
// CID of json!(1)
let cid_1 =
cid::Cid::from_str("bagaaieranodle477gt6odhllqbhp6wr7k5d23jhkuixr2soadzjn3n4hlnfq")
.unwrap();
let unknown_format_cid =
cid::Cid::new(cid::Version::V1, GIT_RAW_CODEC, cid_1.hash().clone()).unwrap();
let unknown_format_cid = CID::new(unknown_format_cid.to_string());
let err = verify_value(&unknown_format_cid, &json!(1));
match err {
Err(CidVerificationError::UnsupportedCidCodec(codec)) => {
assert_eq!(codec, GIT_RAW_CODEC);
}
_ => panic!("wrong result: {:?}", err),
}
}
#[test]
fn test_verify_unknown_hasher() {
use std::str::FromStr;
const SHAKE_128_CODE: u64 = 0x18;
let cid_1 =
cid::Cid::from_str("bagaaieranodle477gt6odhllqbhp6wr7k5d23jhkuixr2soadzjn3n4hlnfq")
.unwrap();
let unknown_hasher_multihash =
Multihash::wrap(SHAKE_128_CODE, cid_1.hash().digest()).unwrap();
let unknown_hasher_cid =
cid::Cid::new(cid::Version::V1, JSON_CODEC, unknown_hasher_multihash).unwrap();
let unknown_hasher_cid = CID::new(unknown_hasher_cid.to_string());
let err = verify_value(&unknown_hasher_cid, &json!(1));
match err {
Err(CidVerificationError::UnsupportedHashCode(code)) => {
assert_eq!(code, SHAKE_128_CODE);
}
_ => panic!("wrong result: {:?}", err),
}
}
#[test]
fn test_verify_unsupported_hasher() {
use std::str::FromStr;
// we have no plan to support it, but it may change, and the test should be corrected
let identity_code: u64 = multihash::Code::Identity.into();
let cid_1 =
cid::Cid::from_str("bagaaieranodle477gt6odhllqbhp6wr7k5d23jhkuixr2soadzjn3n4hlnfq")
.unwrap();
let unknown_hasher_multihash =
Multihash::wrap(identity_code, cid_1.hash().digest()).unwrap();
let unknown_hasher_cid =
cid::Cid::new(cid::Version::V1, JSON_CODEC, unknown_hasher_multihash).unwrap();
let unknown_hasher_cid = CID::new(unknown_hasher_cid.to_string());
let err = verify_value(&unknown_hasher_cid, &json!(1));
match err {
Err(CidVerificationError::UnsupportedHashCode(code)) => {
assert_eq!(code, identity_code);
}
_ => panic!("wrong result: {:?}", err),
}
}
#[test]
fn test_verify_garbage() {
let garbage_cid = CID::new("garbage");
let err = verify_value(&garbage_cid, &json!(1));
assert!(
matches!(
err,
Err(CidVerificationError::MalformedCid(cid::Error::ParsingError))
),
"{:?}",
err
);
}
}

View File

@ -17,8 +17,10 @@
use crate::JValue;
use air_interpreter_cid::value_to_json_cid;
use air_interpreter_cid::verify_value;
use air_interpreter_cid::CidCalculationError;
use air_interpreter_cid::CidRef;
use air_interpreter_cid::CidVerificationError;
use air_interpreter_cid::CID;
use serde::Deserialize;
use serde::Serialize;
@ -71,13 +73,7 @@ impl<Val> CidStore<Val> {
impl<Val: Serialize> CidStore<Val> {
pub fn verify(&self) -> Result<(), CidStoreVerificationError> {
for (cid, value) in &self.0 {
let expected_cid = value_to_json_cid::<Val>(value)?;
if expected_cid != *cid {
return Err(CidStoreVerificationError::MismatchError {
type_name: std::any::type_name::<Val>(),
cid_repr: (*cid).get_inner(),
});
}
verify_value(cid, value)?;
}
Ok(())
}
@ -85,15 +81,8 @@ impl<Val: Serialize> CidStore<Val> {
#[derive(ThisError, Debug)]
pub enum CidStoreVerificationError {
#[error("Failed to recalculate CID during the verification: {0}")]
CidCalculationError(#[from] CidCalculationError),
#[error("Value mismatch in the {type_name:?} store for CID {cid_repr:?}")]
MismatchError {
// nb: type_name is std::any::type_name() result that may be inconsistent between the Rust compiler versions
type_name: &'static str,
cid_repr: Rc<CidRef>,
},
#[error(transparent)]
CidVerificationError(#[from] CidVerificationError),
#[error("Reference CID {target_cid_repr:?} from type {source_type_name:?} to {target_type_name:?} was not found")]
MissingReference {
@ -264,6 +253,6 @@ mod tests {
&json!({"key": 42}),
);
assert_eq!(store.get(&CID::new("loremimpsumdolorsitament")), None,);
assert_eq!(store.get(&CID::new("loremimpsumdolorsitament")), None);
}
}