Improve cost model fitting for better lower-range accuracy (stellar#1293

) ### What This PR was originated from the fact that previously the `VarDeser` cost's constant term was unrealistically high. It improves the numerical approach towards model generation by constraining it to the smallest input in the range. Thus it avoids fitting a model that tries to produce a marginally better fitness at the expense of lower-range accuracy. It also removes the dependency of the `HostCostMeasurement`'s input size on the linear parameter's scale factor (`COST_MODEL_LIN_TERM_SCALE_BITS`), which previously put a numerical constrain on the smallest input size and minimal gaps. [raw calibration outputs](https://github.com/stellar/rs-soroban-env/files/13621533/output_x86_dec_8.txt) ### Why [TODO: Why this change is being made. Include any context required to understand the why.] ### Known limitations [TODO or N/A]
jayz22 · Dec 9, 2023 · 659067f · 659067f
1 parent d02c500
commit 659067f
Show file tree

Hide file tree

Showing 22 changed files with 491 additions and 236 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/soroban-env-host/Cargo.toml b/soroban-env-host/Cargo.toml
@@ -56,11 +56,12 @@ textplots = "=0.8.4"
 wasmprinter = "=0.2.72"
 expect-test = "=1.4.1"
 more-asserts = "=0.3.1"
-linregress = "=0.5.3"
 pretty_assertions = "=1.4.0"
 backtrace = "=0.3.69"
 serde_json = "=1.0.108"
 arbitrary = "=1.3.2"
+lstsq = "=0.5.0"
+nalgebra = { version = "=0.32.3", default-features = false, features = ["std"]}
 
 [dev-dependencies.stellar-xdr]
 version = "=20.0.0"

diff --git a/soroban-env-host/benches/common/cost_types/compute_ecdsa_secp256k1_sig.rs b/soroban-env-host/benches/common/cost_types/compute_ecdsa_secp256k1_sig.rs
@@ -16,7 +16,7 @@ impl HostCostMeasurement for ComputeEcdsaSecp256k1SigMeasure {
     type Runner = ComputeEcdsaSecp256k1SigRun;
 
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> Vec<u8> {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
 
         // Very awkward: the 'rand' crate has two copies linked in due to
         // divergence between the requirements of k256 and ed25519. The StdRng

diff --git a/soroban-env-host/benches/common/cost_types/compute_keccak256_hash.rs b/soroban-env-host/benches/common/cost_types/compute_keccak256_hash.rs
@@ -11,7 +11,7 @@ impl HostCostMeasurement for ComputeKeccak256HashMeasure {
     type Runner = ComputeKeccak256HashRun;
 
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> Vec<u8> {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         (0..size).map(|n| n as u8).collect()
     }
 }
diff --git a/soroban-env-host/benches/common/cost_types/compute_sha256_hash.rs b/soroban-env-host/benches/common/cost_types/compute_sha256_hash.rs
@@ -11,7 +11,7 @@ impl HostCostMeasurement for ComputeSha256HashMeasure {
     type Runner = ComputeSha256HashRun;
 
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> Vec<u8> {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         (0..size).map(|n| n as u8).collect()
     }
 }
diff --git a/soroban-env-host/benches/common/cost_types/host_mem_alloc.rs b/soroban-env-host/benches/common/cost_types/host_mem_alloc.rs
@@ -12,6 +12,6 @@ impl HostCostMeasurement for MemAllocMeasure {
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> u64 {
         // we just pass along the size and let the runner allocate the memory
         // of the given size
-        1 + input * Self::STEP_SIZE
+        Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE
     }
 }
diff --git a/soroban-env-host/benches/common/cost_types/host_mem_cmp.rs b/soroban-env-host/benches/common/cost_types/host_mem_cmp.rs
@@ -13,7 +13,7 @@ impl HostCostMeasurement for MemCmpMeasure {
         rng: &mut StdRng,
         input: u64,
     ) -> <Self::Runner as CostRunner>::SampleType {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let a = randvec(rng, len);
         let b = randvec(rng, len);
         (a, b)
@@ -24,7 +24,7 @@ impl HostCostMeasurement for MemCmpMeasure {
         rng: &mut StdRng,
         input: u64,
     ) -> <Self::Runner as CostRunner>::SampleType {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let a = randvec(rng, len);
         (a.clone(), a)
     }

diff --git a/soroban-env-host/benches/common/cost_types/host_mem_cpy.rs b/soroban-env-host/benches/common/cost_types/host_mem_cpy.rs
@@ -1,6 +1,6 @@
 use crate::common::HostCostMeasurement;
 use rand::{rngs::StdRng, RngCore};
-use soroban_env_host::{budget::COST_MODEL_LIN_TERM_SCALE_BITS, cost_runner::MemCpyRun, Host};
+use soroban_env_host::{cost_runner::MemCpyRun, Host};
 
 // Measures the cost of copying a chunk of memory in the host (no allocation).
 // The input value is the number of bytes copied.
@@ -14,12 +14,11 @@ impl HostCostMeasurement for MemCpyMeasure {
     // small memcpy, which almost all our memcpys are (they're not even likely
     // to be calls to memcpy, they're just "byte moving in the abstract sense",
     // usually only a few dozen or hundred at a time). So we use the smallest
-    // number here we're allowed to use: the linear scale factor, which
-    // STEP_SIZE literally isn't allowed to be smaller than.
-    const STEP_SIZE: u64 = 1 << COST_MODEL_LIN_TERM_SCALE_BITS;
+    // number here we're allowed to use.
+    const STEP_SIZE: u64 = 1;
 
     fn new_random_case(_host: &Host, rng: &mut StdRng, input: u64) -> (Vec<u8>, Vec<u8>) {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let mut a = vec![0; len as usize];
         let mut b = vec![0; len as usize];
         rng.fill_bytes(a.as_mut_slice());

diff --git a/soroban-env-host/benches/common/cost_types/prng.rs b/soroban-env-host/benches/common/cost_types/prng.rs
@@ -9,7 +9,7 @@ impl HostCostMeasurement for ChaCha20DrawBytesMeasure {
     type Runner = ChaCha20DrawBytesRun;
 
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> (ChaCha20Rng, Vec<u8>) {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let seed = [0u8; 32];
         let rng = ChaCha20Rng::from_seed(seed);
         let dest = vec![0u8; size as usize];

diff --git a/soroban-env-host/benches/common/cost_types/recover_ecdsa_secp256k1_key.rs b/soroban-env-host/benches/common/cost_types/recover_ecdsa_secp256k1_key.rs
@@ -27,7 +27,7 @@ impl HostCostMeasurement for RecoverEcdsaSecp256k1KeyMeasure {
         // here, from the package k256 wants (and re-exports).
         let mut rng = k256::elliptic_curve::rand_core::OsRng;
 
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let sec: SecretKey = SecretKey::random(&mut rng);
         let msg: Vec<u8> = (0..size).map(|x| x as u8).collect();
         let hash: Hash = Hash(Keccak256::digest(msg).into());

diff --git a/soroban-env-host/benches/common/cost_types/val_deser.rs b/soroban-env-host/benches/common/cost_types/val_deser.rs
@@ -48,8 +48,8 @@ impl HostCostMeasurement for ValDeserMeasure {
         _rng: &mut rand::prelude::StdRng,
         input: u64,
     ) -> Vec<u8> {
-        let input = 1 + input * Self::STEP_SIZE;
-        let elem_per_level = 1 + input / MAX_DEPTH;
+        let input = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
+        let elem_per_level = (input + MAX_DEPTH) / MAX_DEPTH;
         let mut v = ScVal::U64(0);
         let mut rem = input;
         for _i in 0..MAX_DEPTH {

diff --git a/soroban-env-host/benches/common/cost_types/val_ser.rs b/soroban-env-host/benches/common/cost_types/val_ser.rs
@@ -13,7 +13,7 @@ impl HostCostMeasurement for ValSerMeasure {
     const STEP_SIZE: u64 = 256;
 
     fn new_random_case(_host: &Host, rng: &mut StdRng, input: u64) -> (ScVal, Vec<u8>) {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let mut buf = vec![0; len as usize];
         rng.fill_bytes(buf.as_mut_slice());
         let v = ScVal::Bytes(buf.try_into().unwrap());
@@ -43,7 +43,7 @@ impl HostCostMeasurement for ValSerMeasure {
     // interference of u32.
 
     fn new_worst_case(_host: &Host, rng: &mut StdRng, input: u64) -> (ScVal, Vec<u8>) {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let mut buf = vec![0; len as usize];
         rng.fill_bytes(buf.as_mut_slice());
         let scv_bytes = ScVal::Bytes(buf.try_into().unwrap());

diff --git a/soroban-env-host/benches/common/cost_types/verify_ed25519_sig.rs b/soroban-env-host/benches/common/cost_types/verify_ed25519_sig.rs
@@ -15,7 +15,7 @@ impl HostCostMeasurement for VerifyEd25519SigMeasure {
     type Runner = VerifyEd25519SigRun;
 
     fn new_random_case(_host: &Host, rng: &mut StdRng, input: u64) -> VerifyEd25519SigSample {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let signingkey: SigningKey = SigningKey::generate(rng);
         let key: VerifyingKey = signingkey.verifying_key();
         let msg: Vec<u8> = (0..size).map(|x| x as u8).collect();

diff --git a/soroban-env-host/benches/common/cost_types/visit_object.rs b/soroban-env-host/benches/common/cost_types/visit_object.rs
@@ -9,7 +9,7 @@ impl HostCostMeasurement for VisitObjectMeasure {
     fn new_random_case(host: &Host, _rng: &mut rand::prelude::StdRng, input: u64) -> Vec<Object> {
         // During setup we inject a bunch of copies of the object to make
         // the host object array large.
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let mut vec: Vec<Object> = Vec::with_capacity(size as usize);
         let val = ScVal::I64(i64::MAX);
         for _ in 0..size {

diff --git a/soroban-env-host/benches/common/cost_types/vm_ops.rs b/soroban-env-host/benches/common/cost_types/vm_ops.rs
@@ -26,7 +26,7 @@ impl HostCostMeasurement for VmInstantiationMeasure {
         let id: xdr::Hash = [0; 32].into();
         // generate a test wasm contract with many trivial internal functions,
         // which represents the worst case in terms of work needed for WASM parsing.
-        let n = (input * 30) as usize;
+        let n = (Self::INPUT_BASE_SIZE + input * 30) as usize;
         let wasm = wasm_module_with_n_internal_funcs(n);
         // replace the above two lines with these below to test with wasm contracts
         // with a single function of many instructions. In both tests the cpu grows