From c792c337c00c2af1373d9ee3031b349d013bfcde Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Thu, 4 Jan 2024 21:01:07 -0500 Subject: [PATCH 01/10] 20.1.0 cleanup --- Cargo.lock | 18 +-- Cargo.toml | 12 +- soroban-env-common/src/convert.rs | 10 +- soroban-env-common/src/env.rs | 2 +- soroban-env-common/src/symbol.rs | 113 +++++++++--------- soroban-env-common/src/val.rs | 25 ++-- soroban-env-guest/src/guest.rs | 2 +- soroban-env-host/src/host.rs | 36 +++--- soroban-env-host/src/host/conversion.rs | 24 ++-- soroban-env-host/src/host/frame.rs | 7 +- soroban-env-host/src/host/mem_helper.rs | 4 +- soroban-env-host/src/host_object.rs | 24 +++- soroban-env-host/src/test/prng.rs | 4 +- soroban-env-host/src/vm.rs | 2 +- .../src/synth_linear_memory_tests.rs | 2 +- 15 files changed, 157 insertions(+), 128 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34b6ffa2f..33ede1e9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1333,7 +1333,7 @@ checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" [[package]] name = "soroban-bench-utils" -version = "20.0.1" +version = "20.1.0" dependencies = [ "perf-event", "soroban-env-common", @@ -1342,7 +1342,7 @@ dependencies = [ [[package]] name = "soroban-builtin-sdk-macros" -version = "20.0.1" +version = "20.1.0" dependencies = [ "itertools", "proc-macro2", @@ -1352,7 +1352,7 @@ dependencies = [ [[package]] name = "soroban-env-common" -version = "20.0.1" +version = "20.1.0" dependencies = [ "arbitrary", "crate-git-revision", @@ -1370,7 +1370,7 @@ dependencies = [ [[package]] name = "soroban-env-guest" -version = "20.0.1" +version = "20.1.0" dependencies = [ "soroban-env-common", "static_assertions", @@ -1378,7 +1378,7 @@ dependencies = [ [[package]] name = "soroban-env-host" -version = "20.0.1" +version = "20.1.0" dependencies = [ "arbitrary", "backtrace", @@ -1424,7 +1424,7 @@ dependencies = [ [[package]] name = "soroban-env-macros" -version = "20.0.1" +version = "20.1.0" dependencies = [ "itertools", "proc-macro2", @@ -1437,7 +1437,7 @@ dependencies = [ [[package]] name = "soroban-synth-wasm" -version = "20.0.1" +version = "20.1.0" dependencies = [ "arbitrary", "expect-test", @@ -1450,7 +1450,7 @@ dependencies = [ [[package]] name = "soroban-test-wasms" -version = "20.0.1" +version = "20.1.0" [[package]] name = "soroban-wasmi" @@ -1547,7 +1547,7 @@ dependencies = [ [[package]] name = "test_no_std" -version = "20.0.1" +version = "20.1.0" dependencies = [ "soroban-env-common", ] diff --git a/Cargo.toml b/Cargo.toml index af12cf21c..b26619a2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,14 +16,14 @@ members = [ exclude = ["soroban-test-wasms/wasm-workspace"] [workspace.package] -version = "20.0.1" +version = "20.1.0" [workspace.dependencies] -soroban-env-common = { version = "=20.0.1", path = "soroban-env-common", default-features = false } -soroban-env-guest = { version = "=20.0.1", path = "soroban-env-guest" } -soroban-env-host = { version = "=20.0.1", path = "soroban-env-host" } -soroban-env-macros = { version = "=20.0.1", path = "soroban-env-macros" } -soroban-builtin-sdk-macros = { version = "=20.0.1", path = "soroban-builtin-sdk-macros" } +soroban-env-common = { version = "=20.1.0", path = "soroban-env-common", default-features = false } +soroban-env-guest = { version = "=20.1.0", path = "soroban-env-guest" } +soroban-env-host = { version = "=20.1.0", path = "soroban-env-host" } +soroban-env-macros = { version = "=20.1.0", path = "soroban-env-macros" } +soroban-builtin-sdk-macros = { version = "=20.1.0", path = "soroban-builtin-sdk-macros" } # NB: When updating, also update the version in rs-soroban-env dev-dependencies [workspace.dependencies.stellar-xdr] diff --git a/soroban-env-common/src/convert.rs b/soroban-env-common/src/convert.rs index c03c0a78a..b13ca2546 100644 --- a/soroban-env-common/src/convert.rs +++ b/soroban-env-common/src/convert.rs @@ -525,11 +525,11 @@ where unsafe { Val::from_body_and_tag(i.lo_lo, Tag::I256Small) } } ScVal::Symbol(bytes) => { - let ss = match std::str::from_utf8(bytes.as_slice()) { - Ok(ss) => ss, - Err(_) => return Err(ConversionError), - }; - SymbolSmall::try_from_str(ss)?.into() + // NB: Long symbols are objects and should have been + // handled before reaching this point. + SymbolSmall::try_from_bytes(bytes.as_slice()) + .map_err(|_| ConversionError {})? + .into() } // These should all have been classified as ScValObjRef above, or are diff --git a/soroban-env-common/src/env.rs b/soroban-env-common/src/env.rs index e9fdac42d..3f9b143f7 100644 --- a/soroban-env-common/src/env.rs +++ b/soroban-env-common/src/env.rs @@ -168,7 +168,7 @@ pub trait EnvBase: Sized + Clone { fn string_new_from_slice(&self, slice: &[u8]) -> Result; /// Form a new `Symbol` host object from a slice of client memory. - fn symbol_new_from_slice(&self, slice: &str) -> Result; + fn symbol_new_from_slice(&self, slice: &[u8]) -> Result; /// Form a new `Map` host object from a slice of symbol-names and a slice of values. /// Keys must be in sorted order. diff --git a/soroban-env-common/src/symbol.rs b/soroban-env-common/src/symbol.rs index 9966acf04..52daa1f4f 100644 --- a/soroban-env-common/src/symbol.rs +++ b/soroban-env-common/src/symbol.rs @@ -20,8 +20,8 @@ //! small maximum size for [`SymbolObject`]s, given by //! [`SCSYMBOL_LIMIT`](crate::xdr::SCSYMBOL_LIMIT) (currently 32 bytes). //! Having such a modest maximum size allows working with `Symbol`s -//! entirely in guest WASM code without a heap allocator, using only -//! fixed-size buffers on the WASM shadow stack. The [`SymbolStr`] type is +//! entirely in guest Wasm code without a heap allocator, using only +//! fixed-size buffers on the Wasm shadow stack. The [`SymbolStr`] type is //! a convenience wrapper around such a buffer, that can be directly //! created from a [`Symbol`], copying its bytes from the host if the //! `Symbol` is a `SymbolObject` or unpacking its 6-bit codes if the @@ -45,9 +45,10 @@ //! `Val` word) with zero padding in the high-order bits rather than low-order //! bits. While this means that lexicographical ordering of `SymbolSmall` values //! does not coincide with simple integer ordering of `Val` bodies, it optimizes -//! the space cost of `SymbolSmall` literals in WASM bytecode, where all integer +//! the space cost of `SymbolSmall` literals in Wasm bytecode, where all integer //! literals are encoded as variable-length little-endian values, using ULEB128. +use crate::xdr::SCSYMBOL_LIMIT; use crate::{ declare_tag_based_small_and_object_wrappers, val::ValConvert, Compare, ConversionError, Env, Tag, TryFromVal, Val, @@ -63,8 +64,8 @@ pub enum SymbolError { /// than 9 characters. TooLong(usize), /// Returned when attempting to form a [SymbolObject] or [SymbolSmall] from - /// a string with characters outside the range `[a-zA-Z0-9_]`. - BadChar(char), + /// a byte string with characters outside the range `[a-zA-Z0-9_]`. + BadChar(u8), /// Malformed small symbol (upper two bits were set). MalformedSmall, } @@ -103,29 +104,27 @@ sa::const_assert!(CODE_MASK == 0x3f); sa::const_assert!(CODE_BITS * MAX_SMALL_CHARS + 2 == BODY_BITS); sa::const_assert!(SMALL_MASK == 0x003f_ffff_ffff_ffff); -impl TryFromVal for Symbol { +impl TryFromVal for Symbol { type Error = crate::Error; - fn try_from_val(env: &E, v: &&str) -> Result { - if let Ok(ss) = SymbolSmall::try_from_str(v) { - Ok(Self(ss.0)) + fn try_from_val(env: &E, v: &&[u8]) -> Result { + if v.len() <= MAX_SMALL_CHARS { + SymbolSmall::try_from_bytes(*v) + .map(Into::into) + .map_err(Into::into) } else { - let sobj = env.symbol_new_from_slice(v).map_err(Into::into)?; - Ok(Self(sobj.0)) + env.symbol_new_from_slice(*v) + .map(Into::into) + .map_err(Into::into) } } } -impl TryFromVal for Symbol { +impl TryFromVal for Symbol { type Error = crate::Error; - fn try_from_val(env: &E, v: &&[u8]) -> Result { - // We don't know this byte-slice is actually utf-8 ... - let s: &str = unsafe { core::str::from_utf8_unchecked(v) }; - // ... but this next conversion step will check that its - // _bytes_ are in the symbol-char range, which is a subset - // of utf-8, so we're only lying harmlessly. - Symbol::try_from_val(env, &s) + fn try_from_val(env: &E, v: &&str) -> Result { + Symbol::try_from_val(env, &v.as_bytes()) } } @@ -167,21 +166,27 @@ impl SymbolSmall { impl Symbol { #[doc(hidden)] - pub const unsafe fn from_small_body(body: u64) -> Self { - // This is a helper that should only be used in macros where it's - // necessary to generate a Symbol constant with no possibility of error: - // it just forces the bits that might be set wrong (54 and 55) to zero. - let body = body & SMALL_MASK; - Symbol(SymbolSmall::from_body(body).0) + pub fn validate_bytes(bytes: &[u8]) -> Result<(), SymbolError> { + if bytes.len() as u64 > SCSYMBOL_LIMIT { + return Err(SymbolError::TooLong(bytes.len())); + } + for b in bytes { + SymbolSmall::validate_byte(*b)?; + } + Ok(()) } - pub const fn try_from_small_str(s: &str) -> Result { - match SymbolSmall::try_from_str(s) { - Ok(ss) => Ok(Symbol(ss.0)), + pub const fn try_from_small_bytes(b: &[u8]) -> Result { + match SymbolSmall::try_from_bytes(b) { + Ok(sym) => Ok(Symbol(sym.0)), Err(e) => Err(e), } } + pub const fn try_from_small_str(s: &str) -> Result { + Self::try_from_small_bytes(s.as_bytes()) + } + // This should not be generally available as it can easily panic. #[cfg(feature = "testutils")] pub const fn from_small_str(s: &str) -> Self { @@ -205,7 +210,6 @@ impl TryFrom<&[u8]> for SymbolSmall { #[cfg(feature = "std")] use crate::xdr::StringM; -use crate::xdr::SCSYMBOL_LIMIT; #[cfg(feature = "std")] impl TryFrom> for SymbolSmall { type Error = SymbolError; @@ -224,40 +228,38 @@ impl TryFrom<&StringM> for SymbolSmall { } impl SymbolSmall { - #[doc(hidden)] - pub const fn validate_char(ch: char) -> Result<(), SymbolError> { - match SymbolSmall::encode_char(ch) { + const fn validate_byte(b: u8) -> Result<(), SymbolError> { + match Self::encode_byte(b) { Ok(_) => Ok(()), Err(e) => Err(e), } } - const fn encode_char(ch: char) -> Result { - let v = match ch { - '_' => 1, - '0'..='9' => 2 + ((ch as u64) - ('0' as u64)), - 'A'..='Z' => 12 + ((ch as u64) - ('A' as u64)), - 'a'..='z' => 38 + ((ch as u64) - ('a' as u64)), - _ => return Err(SymbolError::BadChar(ch)), + const fn encode_byte(b: u8) -> Result { + let v = match b { + b'_' => 1, + b'0'..=b'9' => 2 + (b - b'0'), + b'A'..=b'Z' => 12 + (b - b'A'), + b'a'..=b'z' => 38 + (b - b'a'), + _ => return Err(SymbolError::BadChar(b)), }; Ok(v) } - pub const fn try_from_bytes(b: &[u8]) -> Result { + pub const fn try_from_bytes(bytes: &[u8]) -> Result { + if bytes.len() > MAX_SMALL_CHARS { + return Err(SymbolError::TooLong(bytes.len())); + } let mut n = 0; let mut accum: u64 = 0; - while n < b.len() { - let ch = b[n] as char; - if n >= MAX_SMALL_CHARS { - return Err(SymbolError::TooLong(b.len())); - } - n += 1; - accum <<= CODE_BITS; - let v = match SymbolSmall::encode_char(ch) { + while n < bytes.len() { + let v = match Self::encode_byte(bytes[n]) { Ok(v) => v, Err(e) => return Err(e), }; - accum |= v; + accum <<= CODE_BITS; + accum |= v as u64; + n += 1; } Ok(unsafe { Self::from_body(accum) }) } @@ -274,7 +276,7 @@ impl SymbolSmall { // This should not be generally available as it can easily panic. #[cfg(feature = "testutils")] pub const fn from_str(s: &str) -> SymbolSmall { - match Self::try_from_str(s) { + match Self::try_from_bytes(s.as_bytes()) { Ok(sym) => sym, Err(SymbolError::TooLong(_)) => panic!("symbol too long"), Err(SymbolError::BadChar(_)) => panic!("symbol bad char"), @@ -470,7 +472,7 @@ impl TryFrom for SymbolSmall { impl TryFrom<&ScVal> for SymbolSmall { type Error = crate::Error; fn try_from(v: &ScVal) -> Result { - if let ScVal::Symbol(crate::xdr::ScSymbol(vec)) = v { + if let ScVal::Symbol(ScSymbol(vec)) = v { vec.try_into().map_err(Into::into) } else { Err(ConversionError.into()) @@ -565,8 +567,7 @@ mod test_without_string { let input = "stellar"; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - let s: &str = sym_str.as_ref(); - assert_eq!(s, input); + assert_eq!(sym_str.to_string(), input); } #[test] @@ -574,8 +575,7 @@ mod test_without_string { let input = ""; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - let s: &str = sym_str.as_ref(); - assert_eq!(s, input); + assert_eq!(sym_str.to_string(), input); } #[test] @@ -583,8 +583,7 @@ mod test_without_string { let input = "123456789"; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - let s: &str = sym_str.as_ref(); - assert_eq!(s, input); + assert_eq!(sym_str.to_string(), input); } #[test] diff --git a/soroban-env-common/src/val.rs b/soroban-env-common/src/val.rs index 0ebcef545..9d3450441 100644 --- a/soroban-env-common/src/val.rs +++ b/soroban-env-common/src/val.rs @@ -9,7 +9,7 @@ use crate::{ }; use super::{Env, Error, TryFromVal}; -use core::{cmp::Ordering, convert::Infallible, fmt::Debug}; +use core::{cmp::Ordering, convert::Infallible, fmt::Debug, str}; extern crate static_assertions as sa; @@ -609,18 +609,15 @@ impl Val { } } - pub fn can_represent_scval(scv: &crate::xdr::ScVal) -> bool { + /// *Non-recursively* checks whether `ScVal` can be represented as `Val`. + /// Since conversions from `ScVal` are recursive themselves, this should + /// be called at every recursion level during conversion. + pub fn can_represent_scval(scv: &ScVal) -> bool { match scv { - // Handle recursive types first + // Map/vec can't be validated just based on the discriminant, + // as their contents can't be `None`. ScVal::Vec(None) => return false, ScVal::Map(None) => return false, - ScVal::Vec(Some(v)) => return v.0.iter().all(|x| Val::can_represent_scval(x)), - ScVal::Map(Some(m)) => { - return m - .0 - .iter() - .all(|e| Val::can_represent_scval(&e.key) && Val::can_represent_scval(&e.val)) - } _ => Self::can_represent_scval_type(scv.discriminant()), } } @@ -820,8 +817,12 @@ impl Debug for Val { Tag::SymbolSmall => { let ss: SymbolStr = unsafe { ::unchecked_from_val(*self) }.into(); - let s: &str = ss.as_ref(); - write!(f, "Symbol({s})") + // Even though this may be called for an arbitrary, not necessarily well-formed + // `Val`, this is still safe thanks to `SymbolSmall` iteration implementation that + // only returns valid symbol characters or `\0` even for invalid bit + // representations. + let converted_str = unsafe { str::from_utf8_unchecked(ss.as_ref()) }; + write!(f, "Symbol({})", converted_str) } Tag::U64Object => fmt_obj("U64", self, f), diff --git a/soroban-env-guest/src/guest.rs b/soroban-env-guest/src/guest.rs index 79a578837..adeefc9d8 100644 --- a/soroban-env-guest/src/guest.rs +++ b/soroban-env-guest/src/guest.rs @@ -115,7 +115,7 @@ impl EnvBase for Guest { self.string_new_from_linear_memory(lm_pos, len) } - fn symbol_new_from_slice(&self, slice: &str) -> Result { + fn symbol_new_from_slice(&self, slice: &[u8]) -> Result { sa::assert_eq_size!(u32, *const u8); sa::assert_eq_size!(u32, usize); let lm_pos: U32Val = Val::from_u32(slice.as_ptr() as u32); diff --git a/soroban-env-host/src/host.rs b/soroban-env-host/src/host.rs index 76b526069..e15dd75a8 100644 --- a/soroban-env-host/src/host.rs +++ b/soroban-env-host/src/host.rs @@ -17,9 +17,9 @@ use crate::{ ScSymbol, ScVal, TimePoint, Uint256, }, AddressObject, Bool, BytesObject, Compare, ConversionError, EnvBase, Error, I128Object, - I256Object, MapObject, Object, StorageType, StringObject, Symbol, SymbolObject, SymbolSmall, - TryFromVal, U128Object, U256Object, U32Val, U64Val, Val, VecObject, VmCaller, VmCallerEnv, - Void, I256, U256, + I256Object, MapObject, Object, StorageType, StringObject, Symbol, SymbolObject, TryFromVal, + U128Object, U256Object, U32Val, U64Val, Val, VecObject, VmCaller, VmCallerEnv, Void, I256, + U256, }; mod comparison; @@ -51,6 +51,7 @@ use self::{ prng::Prng, }; +use crate::host_object::MemHostObjectType; #[cfg(any(test, feature = "testutils"))] pub use frame::ContractFunctionSet; pub(crate) use frame::Frame; @@ -804,15 +805,13 @@ impl EnvBase for Host { res } - fn symbol_new_from_slice(&self, s: &str) -> Result { + fn symbol_new_from_slice(&self, s: &[u8]) -> Result { call_env_call_hook!(self, s.len()); self.charge_budget(ContractCostType::MemCmp, Some(s.len() as u64))?; - for ch in s.chars() { - SymbolSmall::validate_char(ch)?; - } - let res = self.add_host_object(ScSymbol( - self.metered_slice_to_vec(s.as_bytes())?.try_into()?, - )); + let res = self.add_host_object(ScSymbol::try_from_bytes( + self, + self.metered_slice_to_vec(s)?.try_into()?, + )?); call_env_ret_hook!(self, res); res } @@ -1515,9 +1514,11 @@ impl VmCallerEnv for Host { keys_pos, len as usize, |_n, slice| { + // Optimization note: this does an unnecessary `ScVal` roundtrip. + // We should just use `Symbol::try_from_val` on the slice instead. self.charge_budget(ContractCostType::MemCpy, Some(slice.len() as u64))?; let scsym = ScSymbol(slice.try_into()?); - let sym = Symbol::try_from(self.to_host_val(&ScVal::Symbol(scsym))?)?; + let sym = Symbol::try_from(self.to_valid_host_val(&ScVal::Symbol(scsym))?)?; key_syms.push(sym); Ok(()) }, @@ -1939,7 +1940,7 @@ impl VmCallerEnv for Host { .get(&key, self.as_budget()) .map_err(|e| self.decorate_contract_data_storage_error(e, k))?; match &entry.data { - LedgerEntryData::ContractData(e) => Ok(self.to_host_val(&e.val)?), + LedgerEntryData::ContractData(e) => Ok(self.to_valid_host_val(&e.val)?), _ => Err(self.err( ScErrorType::Storage, ScErrorCode::InternalError, @@ -2254,16 +2255,7 @@ impl VmCallerEnv for Host { let scv = self.visit_obj(b, |hv: &ScBytes| { self.metered_from_xdr::(hv.as_slice()) })?; - if Val::can_represent_scval(&scv) { - self.to_host_val(&scv) - } else { - Err(self.err( - ScErrorType::Value, - ScErrorCode::UnexpectedType, - "Deserialized ScVal type cannot be represented as Val", - &[(scv.discriminant() as i32).into()], - )) - } + self.to_host_val(&scv) } fn string_copy_to_linear_memory( diff --git a/soroban-env-host/src/host/conversion.rs b/soroban-env-host/src/host/conversion.rs index 1ad750f1d..ddedf3b8a 100644 --- a/soroban-env-host/src/host/conversion.rs +++ b/soroban-env-host/src/host/conversion.rs @@ -158,7 +158,6 @@ impl Host { durability: ContractDataDurability, ) -> Result, HostError> { let key_scval = self.from_host_val(k)?; - self.check_val_representable_scval(&key_scval)?; self.storage_key_from_scval(key_scval, durability) } @@ -332,9 +331,6 @@ impl Host { pub(crate) fn to_host_val(&self, v: &ScVal) -> Result { let _span = tracy_span!("ScVal to Val"); - // This is an internal consistency check: this is an internal method and - // any caller should have previously rejected non-representable ScVals. - self.check_val_representable_scval(&v)?; // This is the depth limit checkpoint for `ScVal`->`Val` conversion. // Metering of val conversion happens only if an object is encountered, // and is done inside `to_host_obj`. @@ -346,6 +342,23 @@ impl Host { }) } + // Version of `to_host_val` for the internal cases where the value has to + // be valid by construction (e.g. read from ledger). + pub(crate) fn to_valid_host_val(&self, v: &ScVal) -> Result { + self.to_host_val(v).map_err(|e| { + if e.error.is_type(ScErrorType::Budget) { + e + } else { + self.err( + ScErrorType::Value, + ScErrorCode::InternalError, + "unexpected non-Val-representable ScVal in internal conversion", + &[], + ) + } + }) + } + pub(crate) fn from_host_obj(&self, ob: impl Into) -> Result { unsafe { let objref: Object = ob.into(); @@ -417,9 +430,6 @@ impl Host { pub(crate) fn to_host_obj(&self, ob: &ScValObjRef<'_>) -> Result { let val: &ScVal = (*ob).into(); - // This is an internal consistency check: this is an internal method and any - // caller should have previously rejected non-representable ScVals. - self.check_val_representable_scval(val)?; match val { // Here we have to make sure host object conversion is charged in each variant // below. There is no otherwise ubiquitous metering for ScVal->Val conversion, diff --git a/soroban-env-host/src/host/frame.rs b/soroban-env-host/src/host/frame.rs index 6d117a787..d56500807 100644 --- a/soroban-env-host/src/host/frame.rs +++ b/soroban-env-host/src/host/frame.rs @@ -922,7 +922,12 @@ impl Host { || Ok(vec![]), |m| { m.iter() - .map(|i| Ok((self.to_host_val(&i.key)?, self.to_host_val(&i.val)?))) + .map(|i| { + Ok(( + self.to_valid_host_val(&i.key)?, + self.to_valid_host_val(&i.val)?, + )) + }) .metered_collect::, HostError>>(self)? }, )?, diff --git a/soroban-env-host/src/host/mem_helper.rs b/soroban-env-host/src/host/mem_helper.rs index 70789326e..2837ff588 100644 --- a/soroban-env-host/src/host/mem_helper.rs +++ b/soroban-env-host/src/host/mem_helper.rs @@ -333,7 +333,7 @@ impl Host { ) })?; copy_bytes_in(obj_buf)?; - self.add_host_object(HOT::try_from(obj_new)?) + self.add_host_object(HOT::try_from_bytes(self, obj_new)?) } pub(crate) fn memobj_copy_from_slice( @@ -372,7 +372,7 @@ impl Host { self.charge_budget(ContractCostType::MemAlloc, Some(len as u64))?; let mut vnew: Vec = vec![0; len as usize]; self.metered_vm_read_bytes_from_linear_memory(vmcaller, &vm, pos, &mut vnew)?; - self.add_host_object::(vnew.try_into()?) + self.add_host_object::(HOT::try_from_bytes(self, vnew)?) } pub(crate) fn symbol_matches(&self, s: &[u8], sym: Symbol) -> Result { diff --git a/soroban-env-host/src/host_object.rs b/soroban-env-host/src/host_object.rs index 91837a65e..346a92fc7 100644 --- a/soroban-env-host/src/host_object.rs +++ b/soroban-env-host/src/host_object.rs @@ -155,6 +155,7 @@ pub(crate) trait HostObjectType: MeteredClone { pub(crate) trait MemHostObjectType: HostObjectType + TryFrom, Error = xdr::Error> + Into> { + fn try_from_bytes(host: &Host, bytes: Vec) -> Result; fn as_byte_slice(&self) -> &[u8]; } @@ -183,6 +184,10 @@ macro_rules! declare_mem_host_object_type { ($TY:ty, $TAG:ident, $CASE:ident) => { declare_host_object_type!($TY, $TAG, $CASE); impl MemHostObjectType for $TY { + fn try_from_bytes(_host: &Host, bytes: Vec) -> Result { + Self::try_from(bytes).map_err(Into::into) + } + fn as_byte_slice(&self) -> &[u8] { self.as_slice() } @@ -203,9 +208,26 @@ declare_host_object_type!(U256, U256Object, U256); declare_host_object_type!(I256, I256Object, I256); declare_mem_host_object_type!(xdr::ScBytes, BytesObject, Bytes); declare_mem_host_object_type!(xdr::ScString, StringObject, String); -declare_mem_host_object_type!(xdr::ScSymbol, SymbolObject, Symbol); +declare_host_object_type!(xdr::ScSymbol, SymbolObject, Symbol); declare_host_object_type!(xdr::ScAddress, AddressObject, Address); +impl MemHostObjectType for xdr::ScSymbol { + fn try_from_bytes(host: &Host, bytes: Vec) -> Result { + soroban_env_common::Symbol::validate_bytes(bytes.as_slice()).map_err(|_| { + host.err( + ScErrorType::Value, + ScErrorCode::InvalidInput, + concat!("byte array can not be represented by Symbol"), + &[], + ) + })?; + Self::try_from(bytes).map_err(Into::into) + } + fn as_byte_slice(&self) -> &[u8] { + self.as_ref() + } +} + // Objects come in two flavors: relative and absolute. They are differentiated // by the low bit of the object handle: relative objects have 0, absolutes have // 1. The remaining bits (left shifted by 1) are the index in a corresponding diff --git a/soroban-env-host/src/test/prng.rs b/soroban-env-host/src/test/prng.rs index b8d2a4067..2087e163c 100644 --- a/soroban-env-host/src/test/prng.rs +++ b/soroban-env-host/src/test/prng.rs @@ -12,8 +12,8 @@ use crate::{ /// prng tests // Copy of SymbolSmall::from_str, but not protected by feature="testutils". -// We know our uses here are test-only. -pub const fn ss_from_str(s: &str) -> SymbolSmall { +#[cfg(test)] +const fn ss_from_str(s: &str) -> SymbolSmall { match SymbolSmall::try_from_str(s) { Ok(sym) => sym, _ => panic!("bad symbol"), diff --git a/soroban-env-host/src/vm.rs b/soroban-env-host/src/vm.rs index 97b005d55..8d6a50c71 100644 --- a/soroban-env-host/src/vm.rs +++ b/soroban-env-host/src/vm.rs @@ -331,7 +331,7 @@ impl Vm { let func_ss: SymbolStr = func_sym.try_into_val(host)?; let ext = match self .instance - .get_export(&*self.store.try_borrow_or_err()?, func_ss.as_ref()) + .get_export(&*self.store.try_borrow_or_err()?, &func_ss.to_string()) { None => { return Err(host.err( diff --git a/soroban-env-macros/src/synth_linear_memory_tests.rs b/soroban-env-macros/src/synth_linear_memory_tests.rs index f59665386..021bf60c5 100644 --- a/soroban-env-macros/src/synth_linear_memory_tests.rs +++ b/soroban-env-macros/src/synth_linear_memory_tests.rs @@ -245,7 +245,7 @@ pub fn generate_wasm_module_with_preloaded_linear_memory( // prefill the last 256 bytes with some values // push in the following order: offset(d), val, length(n) f3.i32_const(#DATA_SECTION_0_START as i32); - f3.i32_const(7); + f3.i32_const(68); f3.i32_const(#DATA_SECTION_LEN as i32); f3.memory_fill(); // call the target host function From fb2a94326a2840dfb9ed4f1820028aee032aa92c Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Fri, 5 Jan 2024 13:09:14 -0500 Subject: [PATCH 02/10] Clippy updates --- soroban-env-common/src/symbol.rs | 6 +++--- soroban-env-macros/src/synth_dispatch_host_fn_tests.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/soroban-env-common/src/symbol.rs b/soroban-env-common/src/symbol.rs index 52daa1f4f..28ac17cc9 100644 --- a/soroban-env-common/src/symbol.rs +++ b/soroban-env-common/src/symbol.rs @@ -109,11 +109,11 @@ impl TryFromVal for Symbol { fn try_from_val(env: &E, v: &&[u8]) -> Result { if v.len() <= MAX_SMALL_CHARS { - SymbolSmall::try_from_bytes(*v) + SymbolSmall::try_from_bytes(v) .map(Into::into) .map_err(Into::into) } else { - env.symbol_new_from_slice(*v) + env.symbol_new_from_slice(v) .map(Into::into) .map_err(Into::into) } @@ -276,7 +276,7 @@ impl SymbolSmall { // This should not be generally available as it can easily panic. #[cfg(feature = "testutils")] pub const fn from_str(s: &str) -> SymbolSmall { - match Self::try_from_bytes(s.as_bytes()) { + match Self::try_from_str(s) { Ok(sym) => sym, Err(SymbolError::TooLong(_)) => panic!("symbol too long"), Err(SymbolError::BadChar(_)) => panic!("symbol bad char"), diff --git a/soroban-env-macros/src/synth_dispatch_host_fn_tests.rs b/soroban-env-macros/src/synth_dispatch_host_fn_tests.rs index 8205dcd7f..7976e7466 100644 --- a/soroban-env-macros/src/synth_dispatch_host_fn_tests.rs +++ b/soroban-env-macros/src/synth_dispatch_host_fn_tests.rs @@ -237,7 +237,7 @@ pub fn generate_hostfn_call_with_wrong_types(file_lit: LitStr) -> Result Date: Fri, 5 Jan 2024 13:17:02 -0500 Subject: [PATCH 03/10] More clippy fixes --- soroban-env-host/src/host.rs | 2 +- soroban-env-host/src/vm.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/soroban-env-host/src/host.rs b/soroban-env-host/src/host.rs index e15dd75a8..fd28d3660 100644 --- a/soroban-env-host/src/host.rs +++ b/soroban-env-host/src/host.rs @@ -810,7 +810,7 @@ impl EnvBase for Host { self.charge_budget(ContractCostType::MemCmp, Some(s.len() as u64))?; let res = self.add_host_object(ScSymbol::try_from_bytes( self, - self.metered_slice_to_vec(s)?.try_into()?, + self.metered_slice_to_vec(s)?, )?); call_env_ret_hook!(self, res); res diff --git a/soroban-env-host/src/vm.rs b/soroban-env-host/src/vm.rs index 8d6a50c71..97b005d55 100644 --- a/soroban-env-host/src/vm.rs +++ b/soroban-env-host/src/vm.rs @@ -331,7 +331,7 @@ impl Vm { let func_ss: SymbolStr = func_sym.try_into_val(host)?; let ext = match self .instance - .get_export(&*self.store.try_borrow_or_err()?, &func_ss.to_string()) + .get_export(&*self.store.try_borrow_or_err()?, func_ss.as_ref()) { None => { return Err(host.err( From f2ffeb2a2ca0f2fc798de7318c92ebd386338fe4 Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Fri, 5 Jan 2024 17:09:41 -0500 Subject: [PATCH 04/10] Fix unintentional observation diffs and update the intentional ones. --- soroban-env-common/src/symbol.rs | 23 ++++-------- soroban-env-common/src/val.rs | 20 +++++++++++ ...m_linear_memory_with_bytes_length_oob.json | 32 ++++++++--------- ...from_linear_memory_with_bytes_pos_oob.json | 32 ++++++++--------- ...rom_linear_memory_with_good_inputs_ok.json | 34 +++++++++--------- soroban-env-host/src/host.rs | 36 +++++++++++++++---- soroban-env-host/src/host_object.rs | 24 +++++++++---- .../src/synth_linear_memory_tests.rs | 8 ++++- 8 files changed, 130 insertions(+), 79 deletions(-) diff --git a/soroban-env-common/src/symbol.rs b/soroban-env-common/src/symbol.rs index 28ac17cc9..5327e800e 100644 --- a/soroban-env-common/src/symbol.rs +++ b/soroban-env-common/src/symbol.rs @@ -108,10 +108,11 @@ impl TryFromVal for Symbol { type Error = crate::Error; fn try_from_val(env: &E, v: &&[u8]) -> Result { - if v.len() <= MAX_SMALL_CHARS { - SymbolSmall::try_from_bytes(v) - .map(Into::into) - .map_err(Into::into) + // Optimization note: this should only ever call one conversion + // function based on the input slice length, currently slices + // with invalid characters get re-validated. + if let Ok(s) = SymbolSmall::try_from_bytes(v) { + Ok(s.into()) } else { env.symbol_new_from_slice(v) .map(Into::into) @@ -165,17 +166,6 @@ impl SymbolSmall { } impl Symbol { - #[doc(hidden)] - pub fn validate_bytes(bytes: &[u8]) -> Result<(), SymbolError> { - if bytes.len() as u64 > SCSYMBOL_LIMIT { - return Err(SymbolError::TooLong(bytes.len())); - } - for b in bytes { - SymbolSmall::validate_byte(*b)?; - } - Ok(()) - } - pub const fn try_from_small_bytes(b: &[u8]) -> Result { match SymbolSmall::try_from_bytes(b) { Ok(sym) => Ok(Symbol(sym.0)), @@ -228,7 +218,8 @@ impl TryFrom<&StringM> for SymbolSmall { } impl SymbolSmall { - const fn validate_byte(b: u8) -> Result<(), SymbolError> { + #[doc(hidden)] + pub const fn validate_byte(b: u8) -> Result<(), SymbolError> { match Self::encode_byte(b) { Ok(_) => Ok(()), Err(e) => Err(e), diff --git a/soroban-env-common/src/val.rs b/soroban-env-common/src/val.rs index 9d3450441..d167f2bbb 100644 --- a/soroban-env-common/src/val.rs +++ b/soroban-env-common/src/val.rs @@ -622,6 +622,26 @@ impl Val { } } + /// *Recursively* checks whether `ScVal` can be represented as `Val`. + /// This should only be used once per top-level `ScVal`. + pub fn can_represent_scval_recursive(scv: &ScVal) -> bool { + match scv { + // Handle recursive types first + ScVal::Vec(None) => return false, + ScVal::Map(None) => return false, + ScVal::Vec(Some(v)) => { + return v.0.iter().all(|x| Val::can_represent_scval_recursive(x)) + } + ScVal::Map(Some(m)) => { + return m.0.iter().all(|e| { + Val::can_represent_scval_recursive(&e.key) + && Val::can_represent_scval_recursive(&e.val) + }) + } + _ => Self::can_represent_scval_type(scv.discriminant()), + } + } + /// We define a "good" Val as one that has one of the allowed tag values, /// all the defined body-bits for its case set to valid values, and all the /// undefined body-bits set to zero. diff --git a/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_bytes_length_oob.json b/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_bytes_length_oob.json index 8a2e8dac7..24f3d2759 100644 --- a/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_bytes_length_oob.json +++ b/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_bytes_length_oob.json @@ -1,22 +1,22 @@ { " 0 begin": "cpu:14488, mem:0, prngs:-/9b4a753, objs:-/-, vm:-/-, evt:-, store:-/-, foot:-, stk:-, auth:-/-", - " 1 call bytes_new_from_slice(7551)": "cpu:14535", - " 2 ret bytes_new_from_slice -> Ok(Bytes(obj#1))": "cpu:17382, mem:7631, objs:-/1@629a418c", + " 1 call bytes_new_from_slice(7552)": "cpu:14535", + " 2 ret bytes_new_from_slice -> Ok(Bytes(obj#1))": "cpu:17384, mem:7632, objs:-/1@93859bdc", " 3 call upload_wasm(Bytes(obj#1))": "", - " 4 ret upload_wasm -> Ok(Bytes(obj#3))": "cpu:3581859, mem:510369, objs:-/2@708e5021, store:-/1@79f89faf, foot:1@e9acbadd", - " 5 call bytes_new_from_slice(32)": "cpu:3582299, mem:510433, objs:-/3@b4afb57c", - " 6 ret bytes_new_from_slice -> Ok(Bytes(obj#7))": "cpu:3583268, mem:510545, objs:-/4@c25d3bbe", + " 4 ret upload_wasm -> Ok(Bytes(obj#3))": "cpu:3582273, mem:510411, objs:-/2@68fd4f8f, store:-/1@562e296b, foot:1@19eb60ff", + " 5 call bytes_new_from_slice(32)": "cpu:3582713, mem:510475, objs:-/3@2c8d28cc", + " 6 ret bytes_new_from_slice -> Ok(Bytes(obj#7))": "cpu:3583682, mem:510587, objs:-/4@1354534b", " 7 call create_contract(Address(obj#5), Bytes(obj#3), Bytes(obj#7))": "", - " 8 call obj_cmp(Address(obj#9), Address(obj#5))": "cpu:3584911, mem:510723, objs:-/5@56ae0670, auth:1@452a553c/-", - " 9 ret obj_cmp -> Ok(0)": "cpu:3585203", - " 10 call get_ledger_network_id()": "cpu:3585253, auth:1@452a553c/1@a3fcf5cb", - " 11 ret get_ledger_network_id -> Ok(Bytes(obj#11))": "cpu:3586283, mem:510835, objs:-/6@a9ee625c", - " 12 ret create_contract -> Ok(Address(obj#13))": "cpu:3604416, mem:513909, objs:-/7@1baf55a7, store:-/2@d9b431c5, foot:2@d85d795e, auth:-/1@282b2dd1", - " 13 call call(Address(obj#13), Symbol(loadmem3), Vec(obj#15))": "cpu:3604856, mem:513973, objs:-/8@25f10781, auth:-/-", - " 14 push VM:7109b628:loadmem3(U32(65505), U32(32))": "cpu:6758088, mem:1017221, objs:-/9@f134dae8, vm:65536@b1cd98b9/4@70bfd778, stk:1@1c72d896, auth:1@762323d1/-", - " 15 call symbol_new_from_linear_memory(U32(65505), U32(32))": "cpu:6760565, mem:1017283, vm:-/-", - " 16 ret symbol_new_from_linear_memory -> Err(Error(WasmVm, IndexBounds))": "cpu:6765803, mem:1017331", - " 17 pop VM:7109b628:loadmem3 -> Err(Error(WasmVm, IndexBounds))": " vm:65536@72ee363d/4@70bfd778", + " 8 call obj_cmp(Address(obj#9), Address(obj#5))": "cpu:3585325, mem:510765, objs:-/5@a0fdcb9, auth:1@a39f9cda/-", + " 9 ret obj_cmp -> Ok(0)": "cpu:3585617", + " 10 call get_ledger_network_id()": "cpu:3585667, auth:1@a39f9cda/1@212966ba", + " 11 ret get_ledger_network_id -> Ok(Bytes(obj#11))": "cpu:3586697, mem:510877, objs:-/6@fdc063", + " 12 ret create_contract -> Ok(Address(obj#13))": "cpu:3604830, mem:513951, objs:-/7@fdb4f0d8, store:-/2@8e32ab34, foot:2@203bf709, auth:-/1@19694c2a", + " 13 call call(Address(obj#13), Symbol(loadmem3), Vec(obj#15))": "cpu:3605270, mem:514015, objs:-/8@bd70bf72, auth:-/-", + " 14 push VM:4f3692f2:loadmem3(U32(65505), U32(32))": "cpu:6758859, mem:1017304, objs:-/9@8a195090, vm:65536@b1cd98b9/4@70bfd778, stk:1@79307d1f, auth:1@762323d1/-", + " 15 call symbol_new_from_linear_memory(U32(65505), U32(32))": "cpu:6761336, mem:1017366, vm:-/-", + " 16 ret symbol_new_from_linear_memory -> Err(Error(WasmVm, IndexBounds))": "cpu:6766574, mem:1017414", + " 17 pop VM:4f3692f2:loadmem3 -> Err(Error(WasmVm, IndexBounds))": " vm:65536@a66e806d/4@70bfd778", " 18 ret call -> Err(Error(WasmVm, IndexBounds))": " vm:-/-, stk:-, auth:-/-", - " 19 end": "cpu:6765803, mem:1017331, prngs:-/9b4a753, objs:-/9@f134dae8, vm:-/-, evt:-, store:-/2@d9b431c5, foot:2@d85d795e, stk:-, auth:-/-" + " 19 end": "cpu:6766574, mem:1017414, prngs:-/9b4a753, objs:-/9@8a195090, vm:-/-, evt:-, store:-/2@8e32ab34, foot:2@203bf709, stk:-, auth:-/-" } \ No newline at end of file diff --git a/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_bytes_pos_oob.json b/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_bytes_pos_oob.json index 11d13a451..b3a89b4b8 100644 --- a/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_bytes_pos_oob.json +++ b/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_bytes_pos_oob.json @@ -1,22 +1,22 @@ { " 0 begin": "cpu:14488, mem:0, prngs:-/9b4a753, objs:-/-, vm:-/-, evt:-, store:-/-, foot:-, stk:-, auth:-/-", - " 1 call bytes_new_from_slice(7551)": "cpu:14535", - " 2 ret bytes_new_from_slice -> Ok(Bytes(obj#1))": "cpu:17382, mem:7631, objs:-/1@629a418c", + " 1 call bytes_new_from_slice(7552)": "cpu:14535", + " 2 ret bytes_new_from_slice -> Ok(Bytes(obj#1))": "cpu:17384, mem:7632, objs:-/1@93859bdc", " 3 call upload_wasm(Bytes(obj#1))": "", - " 4 ret upload_wasm -> Ok(Bytes(obj#3))": "cpu:3581859, mem:510369, objs:-/2@708e5021, store:-/1@79f89faf, foot:1@e9acbadd", - " 5 call bytes_new_from_slice(32)": "cpu:3582299, mem:510433, objs:-/3@b4afb57c", - " 6 ret bytes_new_from_slice -> Ok(Bytes(obj#7))": "cpu:3583268, mem:510545, objs:-/4@c25d3bbe", + " 4 ret upload_wasm -> Ok(Bytes(obj#3))": "cpu:3582273, mem:510411, objs:-/2@68fd4f8f, store:-/1@562e296b, foot:1@19eb60ff", + " 5 call bytes_new_from_slice(32)": "cpu:3582713, mem:510475, objs:-/3@2c8d28cc", + " 6 ret bytes_new_from_slice -> Ok(Bytes(obj#7))": "cpu:3583682, mem:510587, objs:-/4@1354534b", " 7 call create_contract(Address(obj#5), Bytes(obj#3), Bytes(obj#7))": "", - " 8 call obj_cmp(Address(obj#9), Address(obj#5))": "cpu:3584911, mem:510723, objs:-/5@56ae0670, auth:1@452a553c/-", - " 9 ret obj_cmp -> Ok(0)": "cpu:3585203", - " 10 call get_ledger_network_id()": "cpu:3585253, auth:1@452a553c/1@a3fcf5cb", - " 11 ret get_ledger_network_id -> Ok(Bytes(obj#11))": "cpu:3586283, mem:510835, objs:-/6@a9ee625c", - " 12 ret create_contract -> Ok(Address(obj#13))": "cpu:3604416, mem:513909, objs:-/7@1baf55a7, store:-/2@d9b431c5, foot:2@d85d795e, auth:-/1@282b2dd1", - " 13 call call(Address(obj#13), Symbol(loadmem3), Vec(obj#15))": "cpu:3604856, mem:513973, objs:-/8@208e7382, auth:-/-", - " 14 push VM:7109b628:loadmem3(U32(65536), U32(32))": "cpu:6758088, mem:1017221, objs:-/9@719aab6e, vm:65536@b1cd98b9/4@70bfd778, stk:1@e630848f, auth:1@762323d1/-", - " 15 call symbol_new_from_linear_memory(U32(65536), U32(32))": "cpu:6760565, mem:1017283, vm:-/-", - " 16 ret symbol_new_from_linear_memory -> Err(Error(WasmVm, IndexBounds))": "cpu:6765803, mem:1017331", - " 17 pop VM:7109b628:loadmem3 -> Err(Error(WasmVm, IndexBounds))": " vm:65536@72ee363d/4@70bfd778", + " 8 call obj_cmp(Address(obj#9), Address(obj#5))": "cpu:3585325, mem:510765, objs:-/5@a0fdcb9, auth:1@a39f9cda/-", + " 9 ret obj_cmp -> Ok(0)": "cpu:3585617", + " 10 call get_ledger_network_id()": "cpu:3585667, auth:1@a39f9cda/1@212966ba", + " 11 ret get_ledger_network_id -> Ok(Bytes(obj#11))": "cpu:3586697, mem:510877, objs:-/6@fdc063", + " 12 ret create_contract -> Ok(Address(obj#13))": "cpu:3604830, mem:513951, objs:-/7@fdb4f0d8, store:-/2@8e32ab34, foot:2@203bf709, auth:-/1@19694c2a", + " 13 call call(Address(obj#13), Symbol(loadmem3), Vec(obj#15))": "cpu:3605270, mem:514015, objs:-/8@2e5eb2ed, auth:-/-", + " 14 push VM:4f3692f2:loadmem3(U32(65536), U32(32))": "cpu:6758859, mem:1017304, objs:-/9@90005a3d, vm:65536@b1cd98b9/4@70bfd778, stk:1@2cdded5e, auth:1@762323d1/-", + " 15 call symbol_new_from_linear_memory(U32(65536), U32(32))": "cpu:6761336, mem:1017366, vm:-/-", + " 16 ret symbol_new_from_linear_memory -> Err(Error(WasmVm, IndexBounds))": "cpu:6766574, mem:1017414", + " 17 pop VM:4f3692f2:loadmem3 -> Err(Error(WasmVm, IndexBounds))": " vm:65536@a66e806d/4@70bfd778", " 18 ret call -> Err(Error(WasmVm, IndexBounds))": " vm:-/-, stk:-, auth:-/-", - " 19 end": "cpu:6765803, mem:1017331, prngs:-/9b4a753, objs:-/9@719aab6e, vm:-/-, evt:-, store:-/2@d9b431c5, foot:2@d85d795e, stk:-, auth:-/-" + " 19 end": "cpu:6766574, mem:1017414, prngs:-/9b4a753, objs:-/9@90005a3d, vm:-/-, evt:-, store:-/2@8e32ab34, foot:2@203bf709, stk:-, auth:-/-" } \ No newline at end of file diff --git a/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_good_inputs_ok.json b/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_good_inputs_ok.json index 93cd8d37a..07b43e8a9 100644 --- a/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_good_inputs_ok.json +++ b/soroban-env-host/observations/test__linear_memory__test_calling_symbol_new_from_linear_memory_with_good_inputs_ok.json @@ -1,22 +1,22 @@ { " 0 begin": "cpu:14488, mem:0, prngs:-/9b4a753, objs:-/-, vm:-/-, evt:-, store:-/-, foot:-, stk:-, auth:-/-", - " 1 call bytes_new_from_slice(7551)": "cpu:14535", - " 2 ret bytes_new_from_slice -> Ok(Bytes(obj#1))": "cpu:17382, mem:7631, objs:-/1@629a418c", + " 1 call bytes_new_from_slice(7552)": "cpu:14535", + " 2 ret bytes_new_from_slice -> Ok(Bytes(obj#1))": "cpu:17384, mem:7632, objs:-/1@93859bdc", " 3 call upload_wasm(Bytes(obj#1))": "", - " 4 ret upload_wasm -> Ok(Bytes(obj#3))": "cpu:3581859, mem:510369, objs:-/2@708e5021, store:-/1@79f89faf, foot:1@e9acbadd", - " 5 call bytes_new_from_slice(32)": "cpu:3582299, mem:510433, objs:-/3@b4afb57c", - " 6 ret bytes_new_from_slice -> Ok(Bytes(obj#7))": "cpu:3583268, mem:510545, objs:-/4@c25d3bbe", + " 4 ret upload_wasm -> Ok(Bytes(obj#3))": "cpu:3582273, mem:510411, objs:-/2@68fd4f8f, store:-/1@562e296b, foot:1@19eb60ff", + " 5 call bytes_new_from_slice(32)": "cpu:3582713, mem:510475, objs:-/3@2c8d28cc", + " 6 ret bytes_new_from_slice -> Ok(Bytes(obj#7))": "cpu:3583682, mem:510587, objs:-/4@1354534b", " 7 call create_contract(Address(obj#5), Bytes(obj#3), Bytes(obj#7))": "", - " 8 call obj_cmp(Address(obj#9), Address(obj#5))": "cpu:3584911, mem:510723, objs:-/5@56ae0670, auth:1@452a553c/-", - " 9 ret obj_cmp -> Ok(0)": "cpu:3585203", - " 10 call get_ledger_network_id()": "cpu:3585253, auth:1@452a553c/1@a3fcf5cb", - " 11 ret get_ledger_network_id -> Ok(Bytes(obj#11))": "cpu:3586283, mem:510835, objs:-/6@a9ee625c", - " 12 ret create_contract -> Ok(Address(obj#13))": "cpu:3604416, mem:513909, objs:-/7@1baf55a7, store:-/2@d9b431c5, foot:2@d85d795e, auth:-/1@282b2dd1", - " 13 call call(Address(obj#13), Symbol(loadmem3), Vec(obj#15))": "cpu:3604856, mem:513973, objs:-/8@e52daca7, auth:-/-", - " 14 push VM:7109b628:loadmem3(U32(65280), U32(32))": "cpu:6758088, mem:1017221, objs:-/9@7576484, vm:65536@b1cd98b9/4@70bfd778, stk:1@f9fb6cd9, auth:1@762323d1/-", - " 15 call symbol_new_from_linear_memory(U32(65280), U32(32))": "cpu:6760565, mem:1017283, vm:-/-", - " 16 ret symbol_new_from_linear_memory -> Ok(Symbol(obj#19))": "cpu:6766243, mem:1017395, objs:-/10@f912210d", - " 17 pop VM:7109b628:loadmem3 -> Ok(Symbol(obj#19))": "cpu:6766739, mem:1017419, objs:1@19be61e5/10@f912210d, vm:65536@72ee363d/4@70bfd778, stk:1@c5a785f4", - " 18 ret call -> Ok(Symbol(obj#19))": "cpu:6766800, objs:-/10@f912210d, vm:-/-, stk:-, auth:-/-", - " 19 end": "cpu:6766800, mem:1017419, prngs:-/9b4a753, objs:-/10@f912210d, vm:-/-, evt:-, store:-/2@d9b431c5, foot:2@d85d795e, stk:-, auth:-/-" + " 8 call obj_cmp(Address(obj#9), Address(obj#5))": "cpu:3585325, mem:510765, objs:-/5@a0fdcb9, auth:1@a39f9cda/-", + " 9 ret obj_cmp -> Ok(0)": "cpu:3585617", + " 10 call get_ledger_network_id()": "cpu:3585667, auth:1@a39f9cda/1@212966ba", + " 11 ret get_ledger_network_id -> Ok(Bytes(obj#11))": "cpu:3586697, mem:510877, objs:-/6@fdc063", + " 12 ret create_contract -> Ok(Address(obj#13))": "cpu:3604830, mem:513951, objs:-/7@fdb4f0d8, store:-/2@8e32ab34, foot:2@203bf709, auth:-/1@19694c2a", + " 13 call call(Address(obj#13), Symbol(loadmem3), Vec(obj#15))": "cpu:3605270, mem:514015, objs:-/8@14981f5b, auth:-/-", + " 14 push VM:4f3692f2:loadmem3(U32(65280), U32(32))": "cpu:6758859, mem:1017304, objs:-/9@6e782621, vm:65536@b1cd98b9/4@70bfd778, stk:1@565c76, auth:1@762323d1/-", + " 15 call symbol_new_from_linear_memory(U32(65280), U32(32))": "cpu:6761336, mem:1017366, vm:-/-", + " 16 ret symbol_new_from_linear_memory -> Ok(Symbol(obj#19))": "cpu:6767014, mem:1017478, objs:-/10@7a2394d1", + " 17 pop VM:4f3692f2:loadmem3 -> Ok(Symbol(obj#19))": "cpu:6767510, mem:1017502, objs:1@19be61e5/10@7a2394d1, vm:65536@a66e806d/4@70bfd778, stk:1@3a060275", + " 18 ret call -> Ok(Symbol(obj#19))": "cpu:6767571, objs:-/10@7a2394d1, vm:-/-, stk:-, auth:-/-", + " 19 end": "cpu:6767571, mem:1017502, prngs:-/9b4a753, objs:-/10@7a2394d1, vm:-/-, evt:-, store:-/2@8e32ab34, foot:2@203bf709, stk:-, auth:-/-" } \ No newline at end of file diff --git a/soroban-env-host/src/host.rs b/soroban-env-host/src/host.rs index fd28d3660..c67d3f58d 100644 --- a/soroban-env-host/src/host.rs +++ b/soroban-env-host/src/host.rs @@ -51,12 +51,12 @@ use self::{ prng::Prng, }; -use crate::host_object::MemHostObjectType; #[cfg(any(test, feature = "testutils"))] pub use frame::ContractFunctionSet; pub(crate) use frame::Frame; #[cfg(any(test, feature = "recording_auth"))] use rand_chacha::ChaCha20Rng; +use soroban_env_common::SymbolSmall; #[derive(Debug, Clone, Default)] pub struct LedgerInfo { @@ -807,11 +807,21 @@ impl EnvBase for Host { fn symbol_new_from_slice(&self, s: &[u8]) -> Result { call_env_call_hook!(self, s.len()); + // Note: this whole function could be replaced by `ScSymbol::try_from_bytes` + // in order to avoid duplication. It is duplicated in order to support + // a slightly different check order for the sake of observation consistency. self.charge_budget(ContractCostType::MemCmp, Some(s.len() as u64))?; - let res = self.add_host_object(ScSymbol::try_from_bytes( - self, - self.metered_slice_to_vec(s)?, - )?); + for b in s { + SymbolSmall::validate_byte(*b).map_err(|_| { + self.err( + ScErrorType::Value, + ScErrorCode::InvalidInput, + "byte is not allowed in Symbol", + &[(*b as u32).into()], + ) + })?; + } + let res = self.add_host_object(ScSymbol(self.metered_slice_to_vec(s)?.try_into()?)); call_env_ret_hook!(self, res); res } @@ -2255,7 +2265,21 @@ impl VmCallerEnv for Host { let scv = self.visit_obj(b, |hv: &ScBytes| { self.metered_from_xdr::(hv.as_slice()) })?; - self.to_host_val(&scv) + // Metering bug: the representation check is not metered, + // so if the value is not valid, we won't charge anything for + // walking the `ScVal`. Since `to_host_val` performs validation + // and has proper metering, next protocol version should just + // call `to_host_val` directly. + if Val::can_represent_scval_recursive(&scv) { + self.to_host_val(&scv) + } else { + Err(self.err( + ScErrorType::Value, + ScErrorCode::UnexpectedType, + "Deserialized ScVal type cannot be represented as Val", + &[(scv.discriminant() as i32).into()], + )) + } } fn string_copy_to_linear_memory( diff --git a/soroban-env-host/src/host_object.rs b/soroban-env-host/src/host_object.rs index 346a92fc7..3c47b20d4 100644 --- a/soroban-env-host/src/host_object.rs +++ b/soroban-env-host/src/host_object.rs @@ -8,7 +8,7 @@ use crate::{ metered_vector::MeteredVector, }, num::{I256, U256}, - xdr::{self, ContractCostType, ScErrorCode, ScErrorType}, + xdr::{self, ContractCostType, ScErrorCode, ScErrorType, SCSYMBOL_LIMIT}, AddressObject, BytesObject, Compare, DurationObject, DurationSmall, Host, HostError, I128Object, I128Small, I256Object, I256Small, I64Object, I64Small, MapObject, Object, StringObject, SymbolObject, SymbolSmall, SymbolStr, TimepointObject, TimepointSmall, @@ -213,14 +213,24 @@ declare_host_object_type!(xdr::ScAddress, AddressObject, Address); impl MemHostObjectType for xdr::ScSymbol { fn try_from_bytes(host: &Host, bytes: Vec) -> Result { - soroban_env_common::Symbol::validate_bytes(bytes.as_slice()).map_err(|_| { - host.err( + if bytes.len() as u64 > SCSYMBOL_LIMIT { + return Err(host.err( ScErrorType::Value, ScErrorCode::InvalidInput, - concat!("byte array can not be represented by Symbol"), - &[], - ) - })?; + "slice is too long to be represented as Symbol", + &[(bytes.len() as u32).into()], + )); + } + for b in &bytes { + SymbolSmall::validate_byte(*b).map_err(|_| { + host.err( + ScErrorType::Value, + ScErrorCode::InvalidInput, + "byte is not allowed in Symbol", + &[(*b as u32).into()], + ) + })?; + } Self::try_from(bytes).map_err(Into::into) } fn as_byte_slice(&self) -> &[u8] { diff --git a/soroban-env-macros/src/synth_linear_memory_tests.rs b/soroban-env-macros/src/synth_linear_memory_tests.rs index 021bf60c5..e294f7f5f 100644 --- a/soroban-env-macros/src/synth_linear_memory_tests.rs +++ b/soroban-env-macros/src/synth_linear_memory_tests.rs @@ -102,6 +102,7 @@ pub fn generate_wasm_module_with_preloaded_linear_memory( let fn_export = hf.export; let arity = hf.args.len() as u32; let wasm_module = format_ident!("wasm_module_calling_{}", hf.name); + let hf_name_str = hf.name; quote! { // define the wasms fn #wasm_module() -> Vec { @@ -245,7 +246,12 @@ pub fn generate_wasm_module_with_preloaded_linear_memory( // prefill the last 256 bytes with some values // push in the following order: offset(d), val, length(n) f3.i32_const(#DATA_SECTION_0_START as i32); - f3.i32_const(68); + if #hf_name_str == "symbol_new_from_linear_memory" { + f3.i32_const(68); + } else { + f3.i32_const(7); + } + f3.i32_const(#DATA_SECTION_LEN as i32); f3.memory_fill(); // call the target host function From 75a838b4780890aa679bea3b6a47a0a029021d90 Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Fri, 5 Jan 2024 17:37:30 -0500 Subject: [PATCH 05/10] Test fix --- soroban-env-common/src/symbol.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/soroban-env-common/src/symbol.rs b/soroban-env-common/src/symbol.rs index 5327e800e..29776cea9 100644 --- a/soroban-env-common/src/symbol.rs +++ b/soroban-env-common/src/symbol.rs @@ -558,7 +558,8 @@ mod test_without_string { let input = "stellar"; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - assert_eq!(sym_str.to_string(), input); + let s: &[u8] = sym_str.as_ref(); + assert_eq!(s, input.as_bytes()); } #[test] @@ -566,7 +567,8 @@ mod test_without_string { let input = ""; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - assert_eq!(sym_str.to_string(), input); + let s: &[u8] = sym_str.as_ref(); + assert_eq!(s, input.as_bytes()); } #[test] @@ -574,7 +576,8 @@ mod test_without_string { let input = "123456789"; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - assert_eq!(sym_str.to_string(), input); + let s: &[u8] = sym_str.as_ref(); + assert_eq!(s, input.as_bytes()); } #[test] From 60aa5581d470200f1af965f8f73b5a58af8b95ea Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Mon, 8 Jan 2024 12:03:14 -0500 Subject: [PATCH 06/10] Revert AsRef changes. --- soroban-env-common/src/symbol.rs | 15 +++++++++------ soroban-env-common/src/val.rs | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/soroban-env-common/src/symbol.rs b/soroban-env-common/src/symbol.rs index 29776cea9..5bdb0bfae 100644 --- a/soroban-env-common/src/symbol.rs +++ b/soroban-env-common/src/symbol.rs @@ -321,6 +321,9 @@ impl AsRef<[u8]> for SymbolStr { } } +// This conversion relies on `SymbolStr` representing a well-formed `Symbol`, +// which in turn relies on `EnvBase` implementation to only produce valid +// `Symbol`s. impl AsRef for SymbolStr { fn as_ref(&self) -> &str { let s: &[u8] = self.as_ref(); @@ -558,8 +561,8 @@ mod test_without_string { let input = "stellar"; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - let s: &[u8] = sym_str.as_ref(); - assert_eq!(s, input.as_bytes()); + let s: &str = sym_str.as_ref(); + assert_eq!(s, input); } #[test] @@ -567,8 +570,8 @@ mod test_without_string { let input = ""; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - let s: &[u8] = sym_str.as_ref(); - assert_eq!(s, input.as_bytes()); + let s: &str = sym_str.as_ref(); + assert_eq!(s, input); } #[test] @@ -576,8 +579,8 @@ mod test_without_string { let input = "123456789"; let sym = SymbolSmall::try_from_str(input).unwrap(); let sym_str = SymbolStr::from(sym); - let s: &[u8] = sym_str.as_ref(); - assert_eq!(s, input.as_bytes()); + let s: &str = sym_str.as_ref(); + assert_eq!(s, input); } #[test] diff --git a/soroban-env-common/src/val.rs b/soroban-env-common/src/val.rs index d167f2bbb..71275d923 100644 --- a/soroban-env-common/src/val.rs +++ b/soroban-env-common/src/val.rs @@ -841,7 +841,7 @@ impl Debug for Val { // `Val`, this is still safe thanks to `SymbolSmall` iteration implementation that // only returns valid symbol characters or `\0` even for invalid bit // representations. - let converted_str = unsafe { str::from_utf8_unchecked(ss.as_ref()) }; + let s: &str = ss.as_ref(); write!(f, "Symbol({})", converted_str) } From 31e1a2da7f7f0cd1393d6e2a827e6d054042e6c1 Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Mon, 8 Jan 2024 12:17:02 -0500 Subject: [PATCH 07/10] Build fix --- soroban-env-common/src/val.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/soroban-env-common/src/val.rs b/soroban-env-common/src/val.rs index 71275d923..689e8954e 100644 --- a/soroban-env-common/src/val.rs +++ b/soroban-env-common/src/val.rs @@ -842,7 +842,7 @@ impl Debug for Val { // only returns valid symbol characters or `\0` even for invalid bit // representations. let s: &str = ss.as_ref(); - write!(f, "Symbol({})", converted_str) + write!(f, "Symbol({})", s) } Tag::U64Object => fmt_obj("U64", self, f), From 5c1fe5207c1bd7376c5f1d5aac2d8770525ff4a1 Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Mon, 8 Jan 2024 15:10:31 -0500 Subject: [PATCH 08/10] Use `ScSymbol::try_from_bytes` in host obj conversion. --- soroban-env-host/src/host/comparison.rs | 4 ++-- soroban-env-host/src/host/conversion.rs | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/soroban-env-host/src/host/comparison.rs b/soroban-env-host/src/host/comparison.rs index 4eded4908..5ebae9854 100644 --- a/soroban-env-host/src/host/comparison.rs +++ b/soroban-env-host/src/host/comparison.rs @@ -534,7 +534,7 @@ mod tests { }), ScVal::Bytes(xdr::ScBytes::try_from(vec![]).unwrap()), ScVal::String(xdr::ScString::try_from(vec![]).unwrap()), - ScVal::Symbol(xdr::ScSymbol::try_from("big-symbol").unwrap()), + ScVal::Symbol(xdr::ScSymbol::try_from("very_big_symbol").unwrap()), ScVal::Vec(Some(xdr::ScVec::try_from((0,)).unwrap())), ScVal::Map(Some(xdr::ScMap::try_from(vec![]).unwrap())), ScVal::Address(xdr::ScAddress::Contract(xdr::Hash([0; 32]))), @@ -702,7 +702,7 @@ mod tests { Tag::StringObject => Val::try_from_val(host, &"foo").unwrap(), Tag::SymbolObject => Val::try_from_val( host, - &ScVal::Symbol(xdr::ScSymbol::try_from("a-big-symbol").unwrap()), + &ScVal::Symbol(xdr::ScSymbol::try_from("a_very_big_symbol").unwrap()), ) .unwrap(), Tag::VecObject => { diff --git a/soroban-env-host/src/host/conversion.rs b/soroban-env-host/src/host/conversion.rs index ddedf3b8a..5e55e0338 100644 --- a/soroban-env-host/src/host/conversion.rs +++ b/soroban-env-host/src/host/conversion.rs @@ -1,5 +1,6 @@ use std::rc::Rc; +use crate::host_object::MemHostObjectType; use crate::{ budget::{AsBudget, DepthLimiter}, err, @@ -500,7 +501,13 @@ impl Host { } ScVal::Bytes(b) => Ok(self.add_host_object(b.metered_clone(self)?)?.into()), ScVal::String(s) => Ok(self.add_host_object(s.metered_clone(self)?)?.into()), - ScVal::Symbol(s) => Ok(self.add_host_object(s.metered_clone(self)?)?.into()), + ScVal::Symbol(s) => Ok(self + .add_host_object(ScSymbol::try_from_bytes( + self, + s.0.metered_clone(self.as_budget())?.into(), + )?)? + .into()), + ScVal::Address(addr) => Ok(self.add_host_object(addr.metered_clone(self)?)?.into()), // None of the following cases should have made it into this function, they From 4d7a10939bc8153fa22e3ec2c7b8f99259cf033f Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Mon, 8 Jan 2024 18:17:19 -0500 Subject: [PATCH 09/10] Add comment for SymbolObject --- soroban-env-host/src/host/conversion.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/soroban-env-host/src/host/conversion.rs b/soroban-env-host/src/host/conversion.rs index 5e55e0338..86a830da2 100644 --- a/soroban-env-host/src/host/conversion.rs +++ b/soroban-env-host/src/host/conversion.rs @@ -501,6 +501,8 @@ impl Host { } ScVal::Bytes(b) => Ok(self.add_host_object(b.metered_clone(self)?)?.into()), ScVal::String(s) => Ok(self.add_host_object(s.metered_clone(self)?)?.into()), + // Similarly to `ScMap`, not every `SCSymbol` XDR is valid. Thus it has to be + // created with the respective fallible conversion method. ScVal::Symbol(s) => Ok(self .add_host_object(ScSymbol::try_from_bytes( self, From b6c9adf583a0be832d91c2eaff1b6820218a48d0 Mon Sep 17 00:00:00 2001 From: Dmytro Kozhevin Date: Mon, 8 Jan 2024 18:30:11 -0500 Subject: [PATCH 10/10] Add comment for linear memory tests. --- soroban-env-macros/src/synth_linear_memory_tests.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/soroban-env-macros/src/synth_linear_memory_tests.rs b/soroban-env-macros/src/synth_linear_memory_tests.rs index e294f7f5f..bf0474302 100644 --- a/soroban-env-macros/src/synth_linear_memory_tests.rs +++ b/soroban-env-macros/src/synth_linear_memory_tests.rs @@ -246,8 +246,9 @@ pub fn generate_wasm_module_with_preloaded_linear_memory( // prefill the last 256 bytes with some values // push in the following order: offset(d), val, length(n) f3.i32_const(#DATA_SECTION_0_START as i32); + // Use a valid `Symbol` character for the `Symbol` tests. if #hf_name_str == "symbol_new_from_linear_memory" { - f3.i32_const(68); + f3.i32_const('D' as i32); } else { f3.i32_const(7); }