From 659067f7aa01e8c1e57cde03e799f08ad72c33c8 Mon Sep 17 00:00:00 2001
From: Jay Geng <jay@stellar.org>
Date: Fri, 8 Dec 2023 19:53:21 -0500
Subject: [PATCH] Improve cost model fitting for better lower-range accuracy
 (#1293)

### What

This PR was originated from the fact that previously the `VarDeser`
cost's constant term was unrealistically high.

It improves the numerical approach towards model generation by
constraining it to the smallest input in the range.
Thus it avoids fitting a model that tries to produce a marginally better
fitness at the expense of lower-range accuracy.

It also removes the dependency of the `HostCostMeasurement`'s input size
on the linear parameter's scale factor
(`COST_MODEL_LIN_TERM_SCALE_BITS`), which previously put a numerical
constrain on the smallest input size and minimal gaps.

[raw calibration
outputs](https://github.com/stellar/rs-soroban-env/files/13621533/output_x86_dec_8.txt)

### Why

[TODO: Why this change is being made. Include any context required to
understand the why.]

### Known limitations

[TODO or N/A]
---
 Cargo.lock                                    |  74 ++---
 soroban-env-host/Cargo.toml                   |   3 +-
 .../cost_types/compute_ecdsa_secp256k1_sig.rs |   2 +-
 .../cost_types/compute_keccak256_hash.rs      |   2 +-
 .../common/cost_types/compute_sha256_hash.rs  |   2 +-
 .../common/cost_types/host_mem_alloc.rs       |   2 +-
 .../benches/common/cost_types/host_mem_cmp.rs |   4 +-
 .../benches/common/cost_types/host_mem_cpy.rs |   9 +-
 .../benches/common/cost_types/prng.rs         |   2 +-
 .../cost_types/recover_ecdsa_secp256k1_key.rs |   2 +-
 .../benches/common/cost_types/val_deser.rs    |   4 +-
 .../benches/common/cost_types/val_ser.rs      |   4 +-
 .../common/cost_types/verify_ed25519_sig.rs   |   2 +-
 .../benches/common/cost_types/visit_object.rs |   2 +-
 .../benches/common/cost_types/vm_ops.rs       |   2 +-
 soroban-env-host/benches/common/measure.rs    |  52 ++-
 soroban-env-host/benches/common/mod.rs        |  32 +-
 soroban-env-host/benches/common/modelfit.rs   | 139 ++++++--
 .../benches/variation_histograms.rs           |   5 +-
 .../benches/worst_case_linear_models.rs       | 300 +++++++++++++-----
 soroban-env-host/src/budget.rs                |  54 +++-
 soroban-env-host/src/budget/model.rs          |  29 +-
 22 files changed, 491 insertions(+), 236 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2a88e8a3a..a246acdad 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -152,7 +152,7 @@ dependencies = [
  "num-bigint",
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -273,7 +273,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -297,7 +297,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -308,7 +308,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -339,7 +339,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -740,15 +740,6 @@ version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
 
-[[package]]
-name = "linregress"
-version = "0.5.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4de04dcecc58d366391f9920245b85ffa684558a5ef6e7736e754347c3aea9c2"
-dependencies = [
- "nalgebra",
-]
-
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.11"
@@ -774,6 +765,15 @@ dependencies = [
  "tracing-subscriber",
 ]
 
+[[package]]
+name = "lstsq"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b2591c55069b74283fbdd97167b402b69c534ba6b2c037847cbcae7e14471d8"
+dependencies = [
+ "nalgebra",
+]
+
 [[package]]
 name = "matchers"
 version = "0.1.0"
@@ -822,7 +822,6 @@ checksum = "307ed9b18cc2423f29e83f84fd23a8e73628727990181f18641a8b5dc2ab1caa"
 dependencies = [
  "approx",
  "matrixmultiply",
- "nalgebra-macros",
  "num-complex",
  "num-rational",
  "num-traits",
@@ -830,17 +829,6 @@ dependencies = [
  "typenum",
 ]
 
-[[package]]
-name = "nalgebra-macros"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91761aed67d03ad966ef783ae962ef9bbaca728d2dd7ceb7939ec110fffad998"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
 [[package]]
 name = "nu-ansi-term"
 version = "0.46.0"
@@ -879,7 +867,7 @@ checksum = "cfb77679af88f8b125209d354a202862602672222e7f2313fdd6dc349bad4712"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -930,7 +918,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -1241,7 +1229,7 @@ checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -1281,7 +1269,7 @@ dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -1359,7 +1347,7 @@ dependencies = [
  "itertools",
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -1404,8 +1392,9 @@ dependencies = [
  "hmac",
  "itertools",
  "k256",
- "linregress",
+ "lstsq",
  "more-asserts",
+ "nalgebra",
  "num-derive",
  "num-integer",
  "num-traits",
@@ -1442,7 +1431,7 @@ dependencies = [
  "serde",
  "serde_json",
  "stellar-xdr",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -1535,17 +1524,6 @@ version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
 
-[[package]]
-name = "syn"
-version = "1.0.109"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
 [[package]]
 name = "syn"
 version = "2.0.39"
@@ -1600,7 +1578,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -1684,7 +1662,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
 ]
 
 [[package]]
@@ -1813,7 +1791,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
  "wasm-bindgen-shared",
 ]
 
@@ -1835,7 +1813,7 @@ checksum = "c5353b8dab669f5e10f5bd76df26a9360c748f054f862ff5f3f8aae0c7fb3907"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.39",
+ "syn",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
diff --git a/soroban-env-host/Cargo.toml b/soroban-env-host/Cargo.toml
index 84abf2296..48fbb5073 100644
--- a/soroban-env-host/Cargo.toml
+++ b/soroban-env-host/Cargo.toml
@@ -56,11 +56,12 @@ textplots = "=0.8.4"
 wasmprinter = "=0.2.72"
 expect-test = "=1.4.1"
 more-asserts = "=0.3.1"
-linregress = "=0.5.3"
 pretty_assertions = "=1.4.0"
 backtrace = "=0.3.69"
 serde_json = "=1.0.108"
 arbitrary = "=1.3.2"
+lstsq = "=0.5.0"
+nalgebra = { version = "=0.32.3", default-features = false, features = ["std"]}
 
 [dev-dependencies.stellar-xdr]
 version = "=20.0.0"
diff --git a/soroban-env-host/benches/common/cost_types/compute_ecdsa_secp256k1_sig.rs b/soroban-env-host/benches/common/cost_types/compute_ecdsa_secp256k1_sig.rs
index 171c67087..d38193b45 100644
--- a/soroban-env-host/benches/common/cost_types/compute_ecdsa_secp256k1_sig.rs
+++ b/soroban-env-host/benches/common/cost_types/compute_ecdsa_secp256k1_sig.rs
@@ -16,7 +16,7 @@ impl HostCostMeasurement for ComputeEcdsaSecp256k1SigMeasure {
     type Runner = ComputeEcdsaSecp256k1SigRun;
 
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> Vec<u8> {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
 
         // Very awkward: the 'rand' crate has two copies linked in due to
         // divergence between the requirements of k256 and ed25519. The StdRng
diff --git a/soroban-env-host/benches/common/cost_types/compute_keccak256_hash.rs b/soroban-env-host/benches/common/cost_types/compute_keccak256_hash.rs
index 4409af885..6536b9f38 100644
--- a/soroban-env-host/benches/common/cost_types/compute_keccak256_hash.rs
+++ b/soroban-env-host/benches/common/cost_types/compute_keccak256_hash.rs
@@ -11,7 +11,7 @@ impl HostCostMeasurement for ComputeKeccak256HashMeasure {
     type Runner = ComputeKeccak256HashRun;
 
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> Vec<u8> {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         (0..size).map(|n| n as u8).collect()
     }
 }
diff --git a/soroban-env-host/benches/common/cost_types/compute_sha256_hash.rs b/soroban-env-host/benches/common/cost_types/compute_sha256_hash.rs
index f8d8cd492..07ce4ec1a 100644
--- a/soroban-env-host/benches/common/cost_types/compute_sha256_hash.rs
+++ b/soroban-env-host/benches/common/cost_types/compute_sha256_hash.rs
@@ -11,7 +11,7 @@ impl HostCostMeasurement for ComputeSha256HashMeasure {
     type Runner = ComputeSha256HashRun;
 
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> Vec<u8> {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         (0..size).map(|n| n as u8).collect()
     }
 }
diff --git a/soroban-env-host/benches/common/cost_types/host_mem_alloc.rs b/soroban-env-host/benches/common/cost_types/host_mem_alloc.rs
index 645c8c261..9f9756f11 100644
--- a/soroban-env-host/benches/common/cost_types/host_mem_alloc.rs
+++ b/soroban-env-host/benches/common/cost_types/host_mem_alloc.rs
@@ -12,6 +12,6 @@ impl HostCostMeasurement for MemAllocMeasure {
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> u64 {
         // we just pass along the size and let the runner allocate the memory
         // of the given size
-        1 + input * Self::STEP_SIZE
+        Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE
     }
 }
diff --git a/soroban-env-host/benches/common/cost_types/host_mem_cmp.rs b/soroban-env-host/benches/common/cost_types/host_mem_cmp.rs
index 5d5c91267..acb64a8f3 100644
--- a/soroban-env-host/benches/common/cost_types/host_mem_cmp.rs
+++ b/soroban-env-host/benches/common/cost_types/host_mem_cmp.rs
@@ -13,7 +13,7 @@ impl HostCostMeasurement for MemCmpMeasure {
         rng: &mut StdRng,
         input: u64,
     ) -> <Self::Runner as CostRunner>::SampleType {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let a = randvec(rng, len);
         let b = randvec(rng, len);
         (a, b)
@@ -24,7 +24,7 @@ impl HostCostMeasurement for MemCmpMeasure {
         rng: &mut StdRng,
         input: u64,
     ) -> <Self::Runner as CostRunner>::SampleType {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let a = randvec(rng, len);
         (a.clone(), a)
     }
diff --git a/soroban-env-host/benches/common/cost_types/host_mem_cpy.rs b/soroban-env-host/benches/common/cost_types/host_mem_cpy.rs
index f294bc300..105c8f43d 100644
--- a/soroban-env-host/benches/common/cost_types/host_mem_cpy.rs
+++ b/soroban-env-host/benches/common/cost_types/host_mem_cpy.rs
@@ -1,6 +1,6 @@
 use crate::common::HostCostMeasurement;
 use rand::{rngs::StdRng, RngCore};
-use soroban_env_host::{budget::COST_MODEL_LIN_TERM_SCALE_BITS, cost_runner::MemCpyRun, Host};
+use soroban_env_host::{cost_runner::MemCpyRun, Host};
 
 // Measures the cost of copying a chunk of memory in the host (no allocation).
 // The input value is the number of bytes copied.
@@ -14,12 +14,11 @@ impl HostCostMeasurement for MemCpyMeasure {
     // small memcpy, which almost all our memcpys are (they're not even likely
     // to be calls to memcpy, they're just "byte moving in the abstract sense",
     // usually only a few dozen or hundred at a time). So we use the smallest
-    // number here we're allowed to use: the linear scale factor, which
-    // STEP_SIZE literally isn't allowed to be smaller than.
-    const STEP_SIZE: u64 = 1 << COST_MODEL_LIN_TERM_SCALE_BITS;
+    // number here we're allowed to use.
+    const STEP_SIZE: u64 = 1;
 
     fn new_random_case(_host: &Host, rng: &mut StdRng, input: u64) -> (Vec<u8>, Vec<u8>) {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let mut a = vec![0; len as usize];
         let mut b = vec![0; len as usize];
         rng.fill_bytes(a.as_mut_slice());
diff --git a/soroban-env-host/benches/common/cost_types/prng.rs b/soroban-env-host/benches/common/cost_types/prng.rs
index 4b1e3a677..3a051b580 100644
--- a/soroban-env-host/benches/common/cost_types/prng.rs
+++ b/soroban-env-host/benches/common/cost_types/prng.rs
@@ -9,7 +9,7 @@ impl HostCostMeasurement for ChaCha20DrawBytesMeasure {
     type Runner = ChaCha20DrawBytesRun;
 
     fn new_random_case(_host: &Host, _rng: &mut StdRng, input: u64) -> (ChaCha20Rng, Vec<u8>) {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let seed = [0u8; 32];
         let rng = ChaCha20Rng::from_seed(seed);
         let dest = vec![0u8; size as usize];
diff --git a/soroban-env-host/benches/common/cost_types/recover_ecdsa_secp256k1_key.rs b/soroban-env-host/benches/common/cost_types/recover_ecdsa_secp256k1_key.rs
index 4a7d5d9ac..da386300f 100644
--- a/soroban-env-host/benches/common/cost_types/recover_ecdsa_secp256k1_key.rs
+++ b/soroban-env-host/benches/common/cost_types/recover_ecdsa_secp256k1_key.rs
@@ -27,7 +27,7 @@ impl HostCostMeasurement for RecoverEcdsaSecp256k1KeyMeasure {
         // here, from the package k256 wants (and re-exports).
         let mut rng = k256::elliptic_curve::rand_core::OsRng;
 
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let sec: SecretKey = SecretKey::random(&mut rng);
         let msg: Vec<u8> = (0..size).map(|x| x as u8).collect();
         let hash: Hash = Hash(Keccak256::digest(msg).into());
diff --git a/soroban-env-host/benches/common/cost_types/val_deser.rs b/soroban-env-host/benches/common/cost_types/val_deser.rs
index c83edad20..58a09aa3a 100644
--- a/soroban-env-host/benches/common/cost_types/val_deser.rs
+++ b/soroban-env-host/benches/common/cost_types/val_deser.rs
@@ -48,8 +48,8 @@ impl HostCostMeasurement for ValDeserMeasure {
         _rng: &mut rand::prelude::StdRng,
         input: u64,
     ) -> Vec<u8> {
-        let input = 1 + input * Self::STEP_SIZE;
-        let elem_per_level = 1 + input / MAX_DEPTH;
+        let input = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
+        let elem_per_level = (input + MAX_DEPTH) / MAX_DEPTH;
         let mut v = ScVal::U64(0);
         let mut rem = input;
         for _i in 0..MAX_DEPTH {
diff --git a/soroban-env-host/benches/common/cost_types/val_ser.rs b/soroban-env-host/benches/common/cost_types/val_ser.rs
index d44f5589a..9deae6672 100644
--- a/soroban-env-host/benches/common/cost_types/val_ser.rs
+++ b/soroban-env-host/benches/common/cost_types/val_ser.rs
@@ -13,7 +13,7 @@ impl HostCostMeasurement for ValSerMeasure {
     const STEP_SIZE: u64 = 256;
 
     fn new_random_case(_host: &Host, rng: &mut StdRng, input: u64) -> (ScVal, Vec<u8>) {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let mut buf = vec![0; len as usize];
         rng.fill_bytes(buf.as_mut_slice());
         let v = ScVal::Bytes(buf.try_into().unwrap());
@@ -43,7 +43,7 @@ impl HostCostMeasurement for ValSerMeasure {
     // interference of u32.
 
     fn new_worst_case(_host: &Host, rng: &mut StdRng, input: u64) -> (ScVal, Vec<u8>) {
-        let len = 1 + input * Self::STEP_SIZE;
+        let len = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let mut buf = vec![0; len as usize];
         rng.fill_bytes(buf.as_mut_slice());
         let scv_bytes = ScVal::Bytes(buf.try_into().unwrap());
diff --git a/soroban-env-host/benches/common/cost_types/verify_ed25519_sig.rs b/soroban-env-host/benches/common/cost_types/verify_ed25519_sig.rs
index f2b8208ef..12110f715 100644
--- a/soroban-env-host/benches/common/cost_types/verify_ed25519_sig.rs
+++ b/soroban-env-host/benches/common/cost_types/verify_ed25519_sig.rs
@@ -15,7 +15,7 @@ impl HostCostMeasurement for VerifyEd25519SigMeasure {
     type Runner = VerifyEd25519SigRun;
 
     fn new_random_case(_host: &Host, rng: &mut StdRng, input: u64) -> VerifyEd25519SigSample {
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let signingkey: SigningKey = SigningKey::generate(rng);
         let key: VerifyingKey = signingkey.verifying_key();
         let msg: Vec<u8> = (0..size).map(|x| x as u8).collect();
diff --git a/soroban-env-host/benches/common/cost_types/visit_object.rs b/soroban-env-host/benches/common/cost_types/visit_object.rs
index d17718af0..8be2f0cad 100644
--- a/soroban-env-host/benches/common/cost_types/visit_object.rs
+++ b/soroban-env-host/benches/common/cost_types/visit_object.rs
@@ -9,7 +9,7 @@ impl HostCostMeasurement for VisitObjectMeasure {
     fn new_random_case(host: &Host, _rng: &mut rand::prelude::StdRng, input: u64) -> Vec<Object> {
         // During setup we inject a bunch of copies of the object to make
         // the host object array large.
-        let size = 1 + input * Self::STEP_SIZE;
+        let size = Self::INPUT_BASE_SIZE + input * Self::STEP_SIZE;
         let mut vec: Vec<Object> = Vec::with_capacity(size as usize);
         let val = ScVal::I64(i64::MAX);
         for _ in 0..size {
diff --git a/soroban-env-host/benches/common/cost_types/vm_ops.rs b/soroban-env-host/benches/common/cost_types/vm_ops.rs
index 928ac876b..d011ace8f 100644
--- a/soroban-env-host/benches/common/cost_types/vm_ops.rs
+++ b/soroban-env-host/benches/common/cost_types/vm_ops.rs
@@ -26,7 +26,7 @@ impl HostCostMeasurement for VmInstantiationMeasure {
         let id: xdr::Hash = [0; 32].into();
         // generate a test wasm contract with many trivial internal functions,
         // which represents the worst case in terms of work needed for WASM parsing.
-        let n = (input * 30) as usize;
+        let n = (Self::INPUT_BASE_SIZE + input * 30) as usize;
         let wasm = wasm_module_with_n_internal_funcs(n);
         // replace the above two lines with these below to test with wasm contracts
         // with a single function of many instructions. In both tests the cpu grows
diff --git a/soroban-env-host/benches/common/measure.rs b/soroban-env-host/benches/common/measure.rs
index 1af19adcf..f60c51b4e 100644
--- a/soroban-env-host/benches/common/measure.rs
+++ b/soroban-env-host/benches/common/measure.rs
@@ -1,14 +1,14 @@
 use rand::{rngs::StdRng, Rng, SeedableRng};
 use soroban_bench_utils::{tracking_allocator::AllocationGroupToken, HostTracker};
 use soroban_env_host::{
-    budget::{AsBudget, CostTracker, COST_MODEL_LIN_TERM_SCALE_BITS},
+    budget::{AsBudget, CostTracker, MeteredCostComponent},
     cost_runner::{CostRunner, CostType},
     Host,
 };
 use std::{io, ops::Range};
 use tabwriter::{Alignment, TabWriter};
 
-use super::{fit_model, FPCostModel};
+use super::modelfit::fit_model;
 
 #[derive(Clone, Debug, Default)]
 pub struct Measurement {
@@ -171,7 +171,7 @@ impl Measurements {
         eprintln!("{}", String::from_utf8(tw.into_inner().unwrap()).unwrap());
     }
 
-    pub fn fit_model_to_cpu(&self) -> FPCostModel {
+    pub fn fit_model_to_cpu(&self) -> (MeteredCostComponent, f64) {
         // data must be preprocessed
         assert_eq!(
             self.measurements.len(),
@@ -181,20 +181,15 @@ impl Measurements {
         let (x, y): (Vec<_>, Vec<_>) = self
             .averaged_net_measurements
             .iter()
-            .map(|m| {
-                (
-                    // we've made sure the raw inputs have been conflated before via HCM::STEP_SIZE,
-                    // here we can safely scale it back
-                    m.inputs.unwrap_or(0) >> COST_MODEL_LIN_TERM_SCALE_BITS,
-                    m.cpu_insns,
-                )
-            })
+            .map(|m| (m.inputs.unwrap_or(0), m.cpu_insns))
             .unzip();
 
-        fit_model(x, y)
+        let model = fit_model(x, y);
+        let r2 = model.r_squared;
+        (model.into(), r2)
     }
 
-    pub fn fit_model_to_mem(&self) -> FPCostModel {
+    pub fn fit_model_to_mem(&self) -> (MeteredCostComponent, f64) {
         // data must be preprocessed
         assert_eq!(
             self.measurements.len(),
@@ -204,17 +199,12 @@ impl Measurements {
         let (x, y): (Vec<_>, Vec<_>) = self
             .averaged_net_measurements
             .iter()
-            .map(|m| {
-                (
-                    // we've made sure the raw inputs have been conflated before via HCM::STEP_SIZE,
-                    // here we can safely scale it back
-                    m.inputs.unwrap_or(0) >> COST_MODEL_LIN_TERM_SCALE_BITS,
-                    m.mem_bytes,
-                )
-            })
+            .map(|m| (m.inputs.unwrap_or(0), m.mem_bytes))
             .unzip();
 
-        fit_model(x, y)
+        let model = fit_model(x, y);
+        let r2 = model.r_squared;
+        (model.into(), r2)
     }
 }
 
@@ -252,16 +242,17 @@ pub trait HostCostMeasurement: Sized {
     /// The type of host runner we're using. Uniquely identifies a `CostType`.
     type Runner: CostRunner;
 
-    /// The `input: u64` will be multiplied by the `STEP_SIZE` for two reasons:
-    /// 1. for fast-running linear components, setting the step size larger can
-    /// ensure each sample runs for longer (compared to measurement fluctuation),
-    /// thus helps extrapolating the linear coefficient.
-    /// 2. when fitting the linear model, the linear coefficient will be scaled
-    /// up by `factor = 2^COST_MODEL_LIN_TERM_SCALE_BITS`, by scaling down the
-    /// actual input size. Thus `STEP_SIZE` must be `>= factor` to account for
-    /// the input downscaling.
+    /// The `input: u64` will be multiplied by the `STEP_SIZE`. It exist mainly
+    /// numerical reasons, for fast-running linear components, setting the step
+    /// size larger can ensure each sample runs for longer (compared to
+    /// measurement fluctuation), thus helps deriving a more accurate linear
+    /// coefficient (slope). This is not relevant for const models.
     const STEP_SIZE: u64 = 1024;
 
+    /// Base size of the HCM input, which does not necessary have the same unit
+    /// as the input to the budget.
+    const INPUT_BASE_SIZE: u64 = 1;
+
     /// Initialize a new instance of a HostMeasurement at a given input _hint_, for
     /// the run; the HostMeasurement can choose a precise input for a given hint
     /// and use it during `run`; the precise input will be extracted at the end
@@ -348,7 +339,6 @@ where
         &mut Vec<<<HCM as HostCostMeasurement>::Runner as CostRunner>::RecycledType>,
     ),
 {
-    assert!(HCM::STEP_SIZE >= (1 << COST_MODEL_LIN_TERM_SCALE_BITS));
     let mut recycled_samples = Vec::with_capacity(samples.len());
     host.as_budget().reset_unlimited().unwrap();
 
diff --git a/soroban-env-host/benches/common/mod.rs b/soroban-env-host/benches/common/mod.rs
index 713f9086b..0d981a384 100644
--- a/soroban-env-host/benches/common/mod.rs
+++ b/soroban-env-host/benches/common/mod.rs
@@ -9,17 +9,18 @@ mod util;
 use cost_types::*;
 use experimental::*;
 pub use measure::*;
-pub use modelfit::*;
 
 use soroban_env_common::xdr::Name;
 use soroban_env_host::{
+    budget::MeteredCostComponent,
     cost_runner::{CostRunner, CostType, WasmInsnType},
     xdr::ContractCostType,
 };
 use std::collections::BTreeMap;
 
 pub(crate) trait Benchmark {
-    fn bench<HCM: HostCostMeasurement>() -> std::io::Result<(FPCostModel, FPCostModel)>;
+    fn bench<HCM: HostCostMeasurement>(
+    ) -> std::io::Result<(MeteredCostComponent, MeteredCostComponent)>;
 }
 
 fn get_explicit_bench_names() -> Option<Vec<String>> {
@@ -40,7 +41,7 @@ fn should_run<HCM: HostCostMeasurement>() -> bool {
 }
 
 fn call_bench<B: Benchmark, HCM: HostCostMeasurement>(
-    params: &mut BTreeMap<CostType, (FPCostModel, FPCostModel)>,
+    params: &mut BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>,
 ) -> std::io::Result<()> {
     if should_run::<HCM>() {
         params.insert(<HCM::Runner as CostRunner>::COST_TYPE, B::bench::<HCM>()?);
@@ -49,8 +50,9 @@ fn call_bench<B: Benchmark, HCM: HostCostMeasurement>(
 }
 
 pub(crate) fn for_each_experimental_cost_measurement<B: Benchmark>(
-) -> std::io::Result<BTreeMap<CostType, (FPCostModel, FPCostModel)>> {
-    let mut params: BTreeMap<CostType, (FPCostModel, FPCostModel)> = BTreeMap::new();
+) -> std::io::Result<BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>> {
+    let mut params: BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)> =
+        BTreeMap::new();
     call_bench::<B, Ed25519ScalarMulMeasure>(&mut params)?;
     call_bench::<B, VerifyEd25519SigMeasure>(&mut params)?;
     call_bench::<B, ReadXdrByteArrayMeasure>(&mut params)?;
@@ -58,8 +60,9 @@ pub(crate) fn for_each_experimental_cost_measurement<B: Benchmark>(
 }
 
 pub(crate) fn for_each_host_cost_measurement<B: Benchmark>(
-) -> std::io::Result<BTreeMap<CostType, (FPCostModel, FPCostModel)>> {
-    let mut params: BTreeMap<CostType, (FPCostModel, FPCostModel)> = BTreeMap::new();
+) -> std::io::Result<BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>> {
+    let mut params: BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)> =
+        BTreeMap::new();
 
     call_bench::<B, ComputeEcdsaSecp256k1SigMeasure>(&mut params)?;
     call_bench::<B, ComputeEd25519PubKeyMeasure>(&mut params)?;
@@ -69,19 +72,22 @@ pub(crate) fn for_each_host_cost_measurement<B: Benchmark>(
     call_bench::<B, VerifyEd25519SigMeasure>(&mut params)?;
     call_bench::<B, VmInstantiationMeasure>(&mut params)?;
     call_bench::<B, VisitObjectMeasure>(&mut params)?;
-    call_bench::<B, ValSerMeasure>(&mut params)?;
     call_bench::<B, ValDeserMeasure>(&mut params)?;
-    call_bench::<B, MemCmpMeasure>(&mut params)?;
+    call_bench::<B, ValSerMeasure>(&mut params)?;
     call_bench::<B, InvokeVmFunctionMeasure>(&mut params)?;
     call_bench::<B, InvokeHostFunctionMeasure>(&mut params)?;
-    call_bench::<B, MemAllocMeasure>(&mut params)?;
-    call_bench::<B, MemCpyMeasure>(&mut params)?;
     call_bench::<B, Int256AddSubMeasure>(&mut params)?;
     call_bench::<B, Int256MulMeasure>(&mut params)?;
     call_bench::<B, Int256DivMeasure>(&mut params)?;
     call_bench::<B, Int256PowMeasure>(&mut params)?;
     call_bench::<B, Int256ShiftMeasure>(&mut params)?;
     call_bench::<B, ChaCha20DrawBytesMeasure>(&mut params)?;
+    // These three mem ones are derived analytically, we do not calibrate them typically
+    if std::env::var("INCLUDE_ANALYTICAL_COSTTYPES").is_ok() {
+        call_bench::<B, MemAllocMeasure>(&mut params)?;
+        call_bench::<B, MemCpyMeasure>(&mut params)?;
+        call_bench::<B, MemCmpMeasure>(&mut params)?;
+    }
 
     if get_explicit_bench_names().is_none() {
         for cost in ContractCostType::variants() {
@@ -95,8 +101,8 @@ pub(crate) fn for_each_host_cost_measurement<B: Benchmark>(
 
 macro_rules! run_wasm_insn_measurement {
     ( $($HCM: ident),* ) => {
-        pub(crate) fn for_each_wasm_insn_measurement<B: Benchmark>() -> std::io::Result<BTreeMap<CostType, (FPCostModel, FPCostModel)>> {
-            let mut params: BTreeMap<CostType, (FPCostModel, FPCostModel)> = BTreeMap::new();
+        pub(crate) fn for_each_wasm_insn_measurement<B: Benchmark>() -> std::io::Result<BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>> {
+            let mut params: BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)> = BTreeMap::new();
             $(
                 if should_run::<$HCM>() {
                     params.insert(<$HCM as HostCostMeasurement>::Runner::COST_TYPE, B::bench::<$HCM>()?);
diff --git a/soroban-env-host/benches/common/modelfit.rs b/soroban-env-host/benches/common/modelfit.rs
index 6e076e9b7..e22c7ae96 100644
--- a/soroban-env-host/benches/common/modelfit.rs
+++ b/soroban-env-host/benches/common/modelfit.rs
@@ -1,13 +1,23 @@
+use nalgebra::{self as na, OMatrix, OVector, U1};
+use num_traits::Pow;
+use soroban_env_host::budget::MeteredCostComponent;
 use std::collections::HashSet;
-use std::str::FromStr;
-
-use linregress::{FormulaRegressionBuilder, RegressionDataBuilder};
 
 #[derive(Debug, Default, Clone, PartialEq, PartialOrd)]
-pub struct FPCostModel {
-    pub const_param: f64,
-    pub lin_param: f64,
-    pub r_squared: f64,
+pub(crate) struct FPCostModel {
+    const_param: f64,
+    lin_param: f64,
+    pub(crate) r_squared: f64,
+}
+
+impl From<FPCostModel> for MeteredCostComponent {
+    fn from(mut model: FPCostModel) -> Self {
+        model.truncate_noise_digits();
+        MeteredCostComponent {
+            const_term: model.const_param.ceil() as u64,
+            lin_term: model.lin_param.into(),
+        }
+    }
 }
 
 // We have to use a floating-point cost model in order to interface with the
@@ -21,6 +31,7 @@ impl FPCostModel {
         fcm.r_squared = r2;
         fcm
     }
+
     // This is the same as the 'evaluate' function in the integral cost model,
     // just using f64 ops rather than saturating integer ops.
     pub fn evaluate(&self, input: f64) -> f64 {
@@ -30,38 +41,38 @@ impl FPCostModel {
         }
         res
     }
-    // Extract the parameters from FPs to integers
-    pub fn params_as_u64(&self) -> (u64, u64) {
-        let extract_param = |f: f64| -> u64 {
-            // clamp the float to 1 digit (to filter noise) then take the ceil
-            let f = f64::from_str(format!("{:.1}", f).as_str()).unwrap();
-            f.ceil() as u64
+
+    // We truncate the floating point values to 6 decimal digits, which should
+    // retain enough precision to apply the scale factor to. This prevents
+    // numerical noises from being rounded up as a non-zero linear term.
+    fn truncate_noise_digits(&mut self) {
+        let round_to_decimal_places = |num: f64, decimal_places: u32| -> f64 {
+            let factor = 10f64.powi(decimal_places as i32);
+            (num * factor).ceil() / factor
         };
-        (
-            extract_param(self.const_param),
-            extract_param(self.lin_param),
-        )
+        self.const_param = round_to_decimal_places(self.const_param, 6);
+        self.lin_param = round_to_decimal_places(self.lin_param, 6);
     }
 }
 
-fn fit_linear_regression(x: Vec<f64>, y: Vec<f64>) -> FPCostModel {
+fn compute_rsquared(x: Vec<f64>, y: Vec<f64>, const_param: f64, lin_param: f64) -> f64 {
     assert_eq!(x.len(), y.len());
-    let data = vec![("Y", y), ("X", x)];
-    let data = RegressionDataBuilder::new().build_from(data).unwrap();
-    let model = FormulaRegressionBuilder::new()
-        .data(&data)
-        .formula("Y ~ X")
-        .fit()
-        .unwrap();
-    let r2 = model.rsquared();
-    FPCostModel::new(model.parameters(), r2)
+    let pred_y: Vec<f64> = x.iter().map(|x| const_param + lin_param * x).collect();
+    let y_mean = y.iter().sum::<f64>() / y.len() as f64;
+    let ss_res = y
+        .iter()
+        .zip(pred_y.iter())
+        .map(|(y, y_pred)| (y - y_pred).pow(2i32))
+        .sum::<f64>();
+    let ss_tot = y.iter().map(|y| (y - y_mean).pow(2)).sum::<f64>();
+    1f64 - ss_res / ss_tot
 }
 
-pub fn fit_model(x: Vec<u64>, y: Vec<u64>) -> FPCostModel {
-    assert_eq!(x.len(), y.len());
-    let const_model = x.iter().collect::<HashSet<_>>().len() == 1;
+pub(crate) fn fit_model(inputs: Vec<u64>, outputs: Vec<u64>) -> FPCostModel {
+    assert_eq!(inputs.len(), outputs.len());
+    let const_model = inputs.iter().collect::<HashSet<_>>().len() == 1;
     if const_model {
-        let const_param = y.iter().sum::<u64>() as f64 / y.len() as f64;
+        let const_param = outputs.iter().sum::<u64>() as f64 / outputs.len() as f64;
         return FPCostModel {
             const_param,
             lin_param: 0.0,
@@ -69,7 +80,67 @@ pub fn fit_model(x: Vec<u64>, y: Vec<u64>) -> FPCostModel {
         };
     }
 
-    let x = x.iter().map(|i| *i as f64).collect::<Vec<_>>();
-    let y = y.iter().map(|i| *i as f64).collect::<Vec<_>>();
-    fit_linear_regression(x, y)
+    let (x, y): (Vec<f64>, Vec<f64>) = inputs
+        .into_iter()
+        .zip(outputs)
+        .map(|(x, y)| (x as f64, y as f64))
+        .unzip();
+
+    // First pass: try to pin the solution to (x0, y(x=x0)), where x0 is the
+    // smallest input in the input range X, assuming X is monotonic increasing.
+    // x0 is not necessary equal to 0. Often times it is unrealistic to build a
+    // sample with input at exactly zero (e.g you can't deserialize a zero byte
+    // blob to XDR). Here we try to pin it to the lowest point to ensure the
+    // y-intercept of the produced curve is sane.
+    assert!(y.len() > 1 && x.len() > 1);
+    let x0 = x.get(0).unwrap();
+    let y0 = y.get(0).unwrap();
+    // we build the matrix a and b, the independent and dependent variables in
+    // the equation to be optimized
+    let a: Vec<f64> = x.iter().map(|x| x - x0).collect();
+    let a = OMatrix::<f64, na::Dyn, U1>::from_column_slice(&a);
+    let b: Vec<f64> = y.iter().map(|y| y - y0).collect();
+    let b = OVector::<f64, na::Dyn>::from_row_slice(&b);
+    // computes the least-square solution with a small tolerance
+    let lsq_res = lstsq::lstsq(&a, &b, 1e-14).unwrap();
+    assert_eq!(lsq_res.solution.len(), 1);
+
+    let lin_param = *lsq_res.solution.get(0).unwrap();
+    assert!(
+        lin_param >= 0.0,
+        "{}",
+        format!(
+            "negative slope {} detected, examine your data, or choose a constant model",
+            lin_param
+        )
+    );
+    let const_param = y0 - lin_param * x0;
+    if const_param >= 0.0 {
+        // we have found our solution: the line is least-square minimal, **and**
+        // the intercept is non-negative
+        let r_squared = compute_rsquared(x.clone(), y.clone(), const_param, lin_param);
+        return FPCostModel {
+            const_param,
+            lin_param,
+            r_squared,
+        };
+    }
+
+    // negative intercept means that extrapolating our solution to the range of
+    // [0, x0), will produce a negative y for some values. This is unaceptable
+    // because someone can pass in an input that produces negative cost.
+    println!(
+        "negative intercept detected, will constrain the solution to pass through (0,0) and rerun"
+    );
+    let a = OMatrix::<f64, na::Dyn, U1>::from_column_slice(&x);
+    let b = OVector::<f64, na::Dyn>::from_row_slice(&y);
+    let lsq_res = lstsq::lstsq(&a, &b, 1e-14).unwrap();
+    assert_eq!(lsq_res.solution.len(), 1);
+    let lin_param = *lsq_res.solution.get(0).unwrap();
+    let r_squared = compute_rsquared(x.clone(), y.clone(), 0.0, lin_param);
+    FPCostModel {
+        const_param: 0.0,
+        lin_param,
+        r_squared,
+    }
 }
diff --git a/soroban-env-host/benches/variation_histograms.rs b/soroban-env-host/benches/variation_histograms.rs
index 221013280..a624b741e 100644
--- a/soroban-env-host/benches/variation_histograms.rs
+++ b/soroban-env-host/benches/variation_histograms.rs
@@ -2,11 +2,12 @@
 // $ cargo bench --features testutils --bench variation_histograms -- --nocapture
 mod common;
 use common::*;
-use soroban_env_host::cost_runner::CostRunner;
+use soroban_env_host::{budget::MeteredCostComponent, cost_runner::CostRunner};
 
 struct LinearModelTables;
 impl Benchmark for LinearModelTables {
-    fn bench<HCM: HostCostMeasurement>() -> std::io::Result<(FPCostModel, FPCostModel)> {
+    fn bench<HCM: HostCostMeasurement>(
+    ) -> std::io::Result<(MeteredCostComponent, MeteredCostComponent)> {
         let mut measurements = measure_cost_variation::<HCM>(100, 1000, false, false)?;
         measurements.check_range_against_baseline(&HCM::Runner::COST_TYPE)?;
         measurements.preprocess();
diff --git a/soroban-env-host/benches/worst_case_linear_models.rs b/soroban-env-host/benches/worst_case_linear_models.rs
index d23c47b5e..00c06337f 100644
--- a/soroban-env-host/benches/worst_case_linear_models.rs
+++ b/soroban-env-host/benches/worst_case_linear_models.rs
@@ -6,6 +6,7 @@
 mod common;
 use common::*;
 use soroban_env_host::{
+    budget::MeteredCostComponent,
     cost_runner::{CostRunner, CostType, WasmInsnType},
     xdr::ContractCostType,
 };
@@ -14,91 +15,226 @@ use tabwriter::{Alignment, TabWriter};
 
 struct WorstCaseLinearModels;
 impl Benchmark for WorstCaseLinearModels {
-    fn bench<HCM: HostCostMeasurement>() -> std::io::Result<(FPCostModel, FPCostModel)> {
-        let mut measurements = measure_worst_case_costs::<HCM>(1..20)?;
+    fn bench<HCM: HostCostMeasurement>(
+    ) -> std::io::Result<(MeteredCostComponent, MeteredCostComponent)> {
+        let floor = std::env::var("FLOOR")
+            .ok()
+            .map(|v| v.parse::<u64>().ok())
+            .flatten()
+            .unwrap_or(0);
+        let range = std::env::var("RANGE")
+            .ok()
+            .map(|v| v.parse::<u64>().ok())
+            .flatten()
+            .unwrap_or(20);
+        let mut measurements = measure_worst_case_costs::<HCM>(floor..range)?;
         measurements.check_range_against_baseline(&HCM::Runner::COST_TYPE)?;
         measurements.preprocess();
         measurements.report_table();
-        let cpu_model = measurements.fit_model_to_cpu();
-        let mem_model = measurements.fit_model_to_mem();
-        println!("cpu model params: {:?}", cpu_model);
-        println!("mem model params: {:?}", mem_model);
+        let (cpu_model, cpu_r2) = measurements.fit_model_to_cpu();
+        let (mem_model, mem_r2) = measurements.fit_model_to_mem();
+        println!(
+            "{:?} cpu: {:?}, R2 score: {}",
+            HCM::Runner::COST_TYPE,
+            cpu_model,
+            cpu_r2
+        );
+        println!(
+            "{:?} mem: {:?}, R2 score: {}",
+            HCM::Runner::COST_TYPE,
+            mem_model,
+            mem_r2
+        );
         Ok((cpu_model, mem_model))
     }
 }
 
 fn write_cost_params_table<T: Display>(
     tw: &mut TabWriter<Vec<u8>>,
-    params: &BTreeMap<T, (FPCostModel, FPCostModel)>,
+    params: &BTreeMap<T, (MeteredCostComponent, MeteredCostComponent)>,
 ) -> std::io::Result<()> {
     writeln!(tw, "").unwrap();
     writeln!(tw, "").unwrap();
     writeln!(tw, "cost_type\tcpu_model_const_param\tcpu_model_lin_param\tmem_model_const_param\tmem_model_lin_param").unwrap();
-    for (ty, (cpu, mem)) in params
-        .iter()
-        .map(|(ty, (cpu, mem))| (ty, (cpu.params_as_u64(), mem.params_as_u64())))
-    {
-        writeln!(tw, "{}\t{}\t{}\t{}\t{}", ty, cpu.0, cpu.1, mem.0, mem.1).unwrap();
+
+    for (ty, (cpu, mem)) in params.iter() {
+        writeln!(
+            tw,
+            "{}\t{}\t{}\t{}\t{}",
+            ty, cpu.const_term, cpu.lin_term, mem.const_term, mem.lin_term
+        )
+        .unwrap();
     }
     tw.flush()
 }
 
 fn write_budget_params_code(
-    params: &BTreeMap<CostType, (FPCostModel, FPCostModel)>,
-    wasm_tier_cost: &BTreeMap<WasmInsnTier, f64>,
+    params: &BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>,
+    wasm_tier_cost: &BTreeMap<WasmInsnTier, u64>,
 ) {
     println!("");
     println!("");
 
-    let base_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::BASE] as u64;
-    let entity_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::ENTITY] as u64;
-    let load_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::LOAD] as u64;
-    let store_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::STORE] as u64;
-    let call_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::CALL] as u64;
-    println!(
-        "
-        // This is the host cpu insn cost per wasm \"fuel\". Every \"base\" wasm
-        // instruction costs 1 fuel (by default), and some particular types of
-        // instructions may cost additional amount of fuel based on
-        // wasmi's config setting. \n
-        ContractCostType::{:?} => {{ cpu.const_term = {}; cpu.lin_term = ScaledU64({}); }}",
-        ContractCostType::WasmInsnExec,
-        base_cpu_per_fuel,
-        0
-    );
+    let base_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::BASE];
+    let entity_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::ENTITY];
+    let load_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::LOAD];
+    let store_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::STORE];
+    let call_cpu_per_fuel = wasm_tier_cost[&WasmInsnTier::CALL];
 
-    for (ty, (cpu, _)) in params
-        .iter()
-        .map(|(ty, (cpu, mem))| (ty, (cpu.params_as_u64(), mem.params_as_u64())))
-    {
-        if let CostType::Contract(ty) = ty {
-            println!(
-                "ContractCostType::{:?} => {{ cpu.const_term = {}; cpu.lin_term = ScaledU64({}); }}",
-                ty, cpu.0, cpu.1
-            );
+    // first print the cpu part
+
+    for ty in ContractCostType::VARIANTS.iter() {
+        match ty {
+            ContractCostType::WasmInsnExec => {
+                println!(
+                    "
+                    // This is the host cpu insn cost per wasm \"fuel\". Every \"base\" wasm
+                    // instruction costs 1 fuel (by default), and some particular types of
+                    // instructions may cost additional amount of fuel based on
+                    // wasmi's config setting. \n
+                    ContractCostType::{:?} => {{ cpu.const_term = {}; cpu.lin_term = ScaledU64({}); }}",
+                    ty,
+                    base_cpu_per_fuel,
+                    0
+                );
+            }
+            ContractCostType::MemAlloc => {
+                println!(
+                    "
+                    // We don't have a clear way of modeling the linear term of
+                    // memalloc cost thus we choose a reasonable upperbound which is
+                    // same as other mem ops.\n
+                    ContractCostType::{:?} => {{ cpu.const_term = 434; cpu.lin_term = ScaledU64::from_unscaled_u64(1).safe_div(8); }}",
+                    ty,
+                );
+            }
+            ContractCostType::MemCpy => {
+                println!(
+                    "
+                    // We don't use a calibrated number for this because sending a
+                    // large calibration-buffer to memcpy hits an optimized
+                    // large-memcpy path in the stdlib, which has both a large
+                    // overhead and a small per-byte cost. But large buffers aren't
+                    // really how byte-copies usually get used in metered code. Most
+                    // calls have to do with small copies of a few tens or hundreds
+                    // of bytes. So instead we just \"reason it out\": we can probably
+                    // copy 8 bytes per instruction on a 64-bit machine, and that
+                    // therefore a 1-byte copy is considered 1/8th of an
+                    // instruction. We also add in a nonzero constant overhead, to
+                    // avoid having anything that can be zero cost and approximate
+                    // whatever function call, arg-shuffling, spills, reloads or
+                    // other flotsam accumulates around a typical memory copy.\n
+                    ContractCostType::{:?} => {{ cpu.const_term = 42; cpu.lin_term = ScaledU64::from_unscaled_u64(1).safe_div(8); }}",
+                    ty,
+                );
+            }
+            ContractCostType::MemCmp => {
+                println!(
+                    "
+                    // This is analytical.
+                    ContractCostType::{:?} => {{ cpu.const_term = 44; cpu.lin_term = ScaledU64::from_unscaled_u64(1).safe_div(8); }}",
+                    ty,
+                );
+            }
+            ContractCostType::VmCachedInstantiation => {
+                println!(
+                    " 
+                    // `VmCachedInstantiation` has not been calibrated, it is copied
+                    // from `VmInstantiation`.\n
+                    "
+                );
+                match params.get(&CostType::Contract(ContractCostType::VmInstantiation)) {
+                    Some((cpu, _)) => println!(
+                        "ContractCostType::VmCachedInstantiation => {{ cpu.const_term = {}; cpu.lin_term = {:?}; }}",
+                        cpu.const_term, cpu.lin_term
+                    ),
+                    None => println!(
+                        "ContractCostType::VmCachedInstantiation => !todo()"
+                    ),
+                }
+            }
+            _ => match params.get(&CostType::Contract(*ty)) {
+                Some((cpu, _)) => println!(
+                    "ContractCostType::{:?} => {{ cpu.const_term = {}; cpu.lin_term = {:?}; }}",
+                    ty, cpu.const_term, cpu.lin_term
+                ),
+                None => println!("ContractCostType::VmCachedInstantiation => !todo()"),
+            },
         }
     }
+
     println!("");
     println!("");
 
-    println!(
-        "
-        // This type is designated to the cpu cost. By definition, the memory cost\n
-        // of a (cpu) fuel is zero.\n
-        ContractCostType::{:?} => {{ mem.const_term = {}; mem.lin_term = ScaledU64({}); }}",
-        ContractCostType::WasmInsnExec,
-        0,
-        0
-    );
-    for (ty, (_, mem)) in params
-        .iter()
-        .map(|(ty, (cpu, mem))| (ty, (cpu.params_as_u64(), mem.params_as_u64())))
-    {
-        if let CostType::Contract(ty) = ty {
-            println!(
-                "ContractCostType::{:?} => {{ mem.const_term = {}; mem.lin_term = ScaledU64({}); }}",
-                ty, mem.0, mem.1
-            );
+    // next print the mem part
+
+    for ty in ContractCostType::VARIANTS.iter() {
+        match ty {
+            ContractCostType::WasmInsnExec => {
+                println!(
+                    "
+                    // This type is designated to the cpu cost. By definition, the
+                    // memory cost of a (cpu) fuel is zero.\n
+                    ContractCostType::{:?} => {{ mem.const_term = {}; mem.lin_term = ScaledU64({}); }}",
+                    ty, 0, 0
+                )
+            }
+            ContractCostType::MemAlloc => {
+                println!(
+                    "// This is analytical.\n
+                    ContractCostType::{:?} => {{ mem.const_term = 16; mem.lin_term = ScaledU64::from_unscaled_u64(1); }}",
+                    ty
+                )
+            }
+            ContractCostType::MemCmp | ContractCostType::MemCpy => {
+                println!(
+                    "// This is analytical.\n
+                    ContractCostType::{:?} => {{ mem.const_term = 0; mem.lin_term = ScaledU64(0); }}",
+                    ty
+                )
+            }
+            ContractCostType::ValSer => {
+                println!(
+                    "
+                    // This is analytically derived from calibration on highly nested
+                    // xdr structures.\n
+                    ContractCostType::{:?} => {{ mem.const_term = 242; mem.lin_term = ScaledU64::from_unscaled_u64(3); }}",
+                    ty
+                )
+            }
+            ContractCostType::ValDeser => {
+                println!(
+                    "
+                    // This is analytically derived from calibration on highly nested
+                    // xdr structures.\n
+                    ContractCostType::{:?} => {{ mem.const_term = 0; mem.lin_term = ScaledU64::from_unscaled_u64(3); }}",
+                    ty
+                )
+            }
+            ContractCostType::VmCachedInstantiation => {
+                println!(
+                    " 
+                    // `VmCachedInstantiation` has not been calibrated, it is copied
+                    // from `VmInstantiation`.\n
+                    "
+                );
+                match params.get(&CostType::Contract(ContractCostType::VmInstantiation)) {
+                    Some((_, mem)) => println!(
+                        "ContractCostType::VmCachedInstantiation => {{ mem.const_term = {}; mem.lin_term = {:?}; }}",
+                        mem.const_term, mem.lin_term
+                    ),
+                    None => println!(
+                        "ContractCostType::VmCachedInstantiation => !todo()"
+                    ),
+                }
+            }
+            _ => match params.get(&CostType::Contract(*ty)) {
+                Some((_, mem)) => println!(
+                    "ContractCostType::{:?} => {{ mem.const_term = {}; mem.lin_term = {:?}; }}",
+                    ty, mem.const_term, mem.lin_term
+                ),
+                None => println!("ContractCostType::VmCachedInstantiation => !todo()"),
+            },
         }
     }
 
@@ -108,37 +244,57 @@ fn write_budget_params_code(
         "
         FuelConfig {{base: {}, entity: {}, load: {}, store: {}, call: {}}}",
         1,
-        (entity_cpu_per_fuel / base_cpu_per_fuel).max(1),
-        (load_cpu_per_fuel / base_cpu_per_fuel).max(1),
-        (store_cpu_per_fuel / base_cpu_per_fuel).max(1),
-        (call_cpu_per_fuel / base_cpu_per_fuel).max(1)
+        (entity_cpu_per_fuel
+            .checked_div(base_cpu_per_fuel)
+            .unwrap_or(0))
+        .max(1),
+        (load_cpu_per_fuel
+            .checked_div(base_cpu_per_fuel)
+            .unwrap_or(0))
+        .max(1),
+        (store_cpu_per_fuel
+            .checked_div(base_cpu_per_fuel)
+            .unwrap_or(0))
+        .max(1),
+        (call_cpu_per_fuel
+            .checked_div(base_cpu_per_fuel)
+            .unwrap_or(0))
+        .max(1)
     )
 }
 
 fn extract_tier(
-    params_wasm: &BTreeMap<CostType, (FPCostModel, FPCostModel)>,
+    params_wasm: &BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>,
     insn_tier: &[WasmInsnType],
-) -> (BTreeMap<WasmInsnType, (FPCostModel, FPCostModel)>, f64) {
-    let mut params_tier: BTreeMap<WasmInsnType, (FPCostModel, FPCostModel)> = BTreeMap::new();
+) -> (
+    BTreeMap<WasmInsnType, (MeteredCostComponent, MeteredCostComponent)>,
+    u64,
+) {
+    let mut params_tier: BTreeMap<WasmInsnType, (MeteredCostComponent, MeteredCostComponent)> =
+        BTreeMap::new();
     for ty in insn_tier {
         if let Some(res) = params_wasm.get(&CostType::Wasm(*ty)) {
             params_tier.insert(ty.clone(), res.clone());
         }
     }
 
-    let cpu_per_fuel: Vec<f64> = params_tier
+    let cpu_per_fuel: Vec<u64> = params_tier
         .iter()
-        .map(|(_, (cpu, _))| cpu.const_param)
+        .map(|(_, (cpu, _))| cpu.const_term)
         .collect();
-    let ave_cpu_per_fuel = cpu_per_fuel.iter().sum::<f64>() / cpu_per_fuel.len() as f64;
+    let ave_cpu_per_fuel = cpu_per_fuel
+        .iter()
+        .sum::<u64>()
+        .checked_div(cpu_per_fuel.len() as u64)
+        .unwrap_or(0);
     (params_tier, ave_cpu_per_fuel)
 }
 
 fn process_tier(
     tier: WasmInsnTier,
-    params_wasm: &BTreeMap<CostType, (FPCostModel, FPCostModel)>,
+    params_wasm: &BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>,
     insn_tier: &[WasmInsnType],
-) -> f64 {
+) -> u64 {
     println!("\n");
     println!("\n{:=<100}", "");
     println!("\"{:?}\" tier", tier);
@@ -159,14 +315,14 @@ fn process_tier(
 }
 
 fn extract_wasmi_fuel_costs(
-    params_wasm: &BTreeMap<CostType, (FPCostModel, FPCostModel)>,
-) -> BTreeMap<WasmInsnTier, f64> {
+    params_wasm: &BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>,
+) -> BTreeMap<WasmInsnTier, u64> {
     let base_cost = process_tier(WasmInsnTier::BASE, params_wasm, &WASM_INSN_BASE);
     let entity_cost = process_tier(WasmInsnTier::ENTITY, params_wasm, &WASM_INSN_ENTITY);
     let load_cost = process_tier(WasmInsnTier::LOAD, params_wasm, &WASM_INSN_LOAD);
     let store_cost = process_tier(WasmInsnTier::STORE, params_wasm, &WASM_INSN_STORE);
     let call_cost = process_tier(WasmInsnTier::CALL, params_wasm, &WASM_INSN_CALL);
-    let mut res: BTreeMap<WasmInsnTier, f64> = BTreeMap::new();
+    let mut res: BTreeMap<WasmInsnTier, u64> = BTreeMap::new();
     res.insert(WasmInsnTier::BASE, base_cost);
     res.insert(WasmInsnTier::ENTITY, entity_cost);
     res.insert(WasmInsnTier::LOAD, load_cost);
diff --git a/soroban-env-host/src/budget.rs b/soroban-env-host/src/budget.rs
index e628df901..116a9c228 100644
--- a/soroban-env-host/src/budget.rs
+++ b/soroban-env-host/src/budget.rs
@@ -6,7 +6,7 @@ mod wasmi_helper;
 
 pub(crate) use limits::DepthLimiter;
 pub use limits::{DEFAULT_HOST_DEPTH_LIMIT, DEFAULT_XDR_RW_LIMITS};
-pub use model::COST_MODEL_LIN_TERM_SCALE_BITS;
+pub use model::{MeteredCostComponent, ScaledU64};
 
 use std::{
     cell::{RefCell, RefMut},
@@ -21,7 +21,6 @@ use crate::{
 };
 
 use dimension::{BudgetDimension, IsCpu, IsShadowMode};
-use model::ScaledU64;
 use wasmi_helper::FuelConfig;
 
 #[derive(Debug, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
@@ -584,6 +583,57 @@ impl Display for BudgetImpl {
     }
 }
 
+#[allow(unused)]
+#[cfg(test)]
+impl BudgetImpl {
+    // Utility function for printing default budget cost parameters in cpp format
+    // so that it can be ported into stellar-core.
+    // When needing it, copy and run the following test
+    // ```
+    // #[test]
+    // fn test() {
+    //     let bi = BudgetImpl::default();
+    //     bi.print_default_params_in_cpp();
+    // }
+    // ```
+    // and copy the screen output.
+    fn print_default_params_in_cpp(&self) {
+        // cpu
+        println!();
+        println!();
+        println!();
+        for ct in ContractCostType::variants() {
+            let Some(cpu) = self.cpu_insns.get_cost_model(ct) else {
+                continue;
+            };
+            println!("case {}:", ct.name());
+            println!(
+                "params[val] = ContractCostParamEntry{{ExtensionPoint{{0}}, {}, {}}};",
+                cpu.const_term, cpu.lin_term.0
+            );
+            println!("break;");
+        }
+        // mem
+        println!();
+        println!();
+        println!();
+        for ct in ContractCostType::variants() {
+            let Some(mem) = self.mem_bytes.get_cost_model(ct) else {
+                continue;
+            };
+            println!("case {}:", ct.name());
+            println!(
+                "params[val] = ContractCostParamEntry{{ExtensionPoint{{0}}, {}, {}}};",
+                mem.const_term, mem.lin_term.0
+            );
+            println!("break;");
+        }
+        println!();
+        println!();
+        println!();
+    }
+}
+
 #[derive(Clone)]
 pub struct Budget(pub(crate) Rc<RefCell<BudgetImpl>>);
 
diff --git a/soroban-env-host/src/budget/model.rs b/soroban-env-host/src/budget/model.rs
index 8047c0805..39b9c20df 100644
--- a/soroban-env-host/src/budget/model.rs
+++ b/soroban-env-host/src/budget/model.rs
@@ -35,21 +35,21 @@ pub trait HostCostModel {
 /// been scaled by this factor during parameter fitting to retain more significant
 /// digits. Thus to get the cost from the raw input, we need to scale the result
 /// back by the same factor.
-pub const COST_MODEL_LIN_TERM_SCALE_BITS: u32 = 7;
+const COST_MODEL_LIN_TERM_SCALE_BITS: u32 = 7;
 
 /// A helper type that wraps an u64 to signify the wrapped value have been scaled.
-#[derive(Clone, Default)]
-pub(crate) struct ScaledU64(pub(crate) u64);
+#[derive(Clone, Default, Debug)]
+pub struct ScaledU64(pub(crate) u64);
 
 impl ScaledU64 {
-    pub const fn unscale(self) -> u64 {
-        self.0 >> COST_MODEL_LIN_TERM_SCALE_BITS
-    }
-
     pub const fn from_unscaled_u64(u: u64) -> Self {
         ScaledU64(u << COST_MODEL_LIN_TERM_SCALE_BITS)
     }
 
+    pub const fn unscale(self) -> u64 {
+        self.0 >> COST_MODEL_LIN_TERM_SCALE_BITS
+    }
+
     pub const fn is_zero(&self) -> bool {
         self.0 == 0
     }
@@ -72,16 +72,19 @@ impl Display for ScaledU64 {
     }
 }
 
-impl Debug for ScaledU64 {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Scaled({})", self.0)
+#[cfg(feature = "bench")]
+impl From<f64> for ScaledU64 {
+    fn from(unscaled: f64) -> Self {
+        let scaled = unscaled * ((1 << COST_MODEL_LIN_TERM_SCALE_BITS) as f64);
+        // We err on the side of overestimation by applying `ceil` to the input.
+        ScaledU64(scaled.ceil() as u64)
     }
 }
 
 #[derive(Clone, Debug, Default)]
-pub(crate) struct MeteredCostComponent {
-    pub(crate) const_term: u64,
-    pub(crate) lin_term: ScaledU64,
+pub struct MeteredCostComponent {
+    pub const_term: u64,
+    pub lin_term: ScaledU64,
 }
 
 impl TryFrom<&ContractCostParamEntry> for MeteredCostComponent {