From b3d26d749d97241b896e07cfcd54b2e0050e7995 Mon Sep 17 00:00:00 2001 From: Noa Date: Tue, 8 Oct 2024 17:32:23 -0500 Subject: [PATCH 1/2] Update hashbrown to 0.15 --- Cargo.lock | 50 +++++++++++-------- Cargo.toml | 2 +- ...ps__spacetimedb_bindings_dependencies.snap | 15 +++--- crates/client-api/src/routes/prometheus.rs | 2 +- crates/core/src/db/relational_operators.rs | 2 +- .../module_subscription_manager.rs | 2 +- crates/core/src/subscription/subscription.rs | 2 +- crates/data-structures/Cargo.toml | 1 + crates/data-structures/src/map.rs | 9 ++-- crates/lib/src/relation.rs | 2 +- crates/schema/src/type_for_generate.rs | 2 +- crates/sdk/src/spacetime_module.rs | 2 +- crates/table/src/btree_index.rs | 2 +- crates/table/src/table.rs | 2 +- crates/testing/src/sdk.rs | 2 +- crates/vm/src/expr.rs | 2 +- crates/vm/src/rel_ops.rs | 2 +- 17 files changed, 58 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d63d715467..24854371e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1751,7 +1751,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" dependencies = [ "fallible-iterator 0.3.0", - "indexmap 2.5.0", + "indexmap 2.6.0", "stable_deref_trait", ] @@ -1779,7 +1779,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.5.0", + "indexmap 2.6.0", "slab", "tokio", "tokio-util", @@ -1822,6 +1822,15 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash 0.8.11", "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +dependencies = [ + "equivalent", "rayon", "serde", ] @@ -2176,12 +2185,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.0", "serde", ] @@ -2740,7 +2749,7 @@ checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "crc32fast", "hashbrown 0.14.5", - "indexmap 2.5.0", + "indexmap 2.6.0", "memchr", ] @@ -2957,7 +2966,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.5.0", + "indexmap 2.6.0", ] [[package]] @@ -3034,7 +3043,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42cf17e9a1800f5f396bc67d193dc9411b59012a5876445ef450d449881e1016" dependencies = [ "base64 0.22.1", - "indexmap 2.5.0", + "indexmap 2.6.0", "quick-xml 0.32.0", "serde", "time", @@ -3272,7 +3281,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1a341ae463320e9f8f34adda49c8a85d81d4e8f34cce4397fb0350481552224" dependencies = [ "chrono", - "indexmap 2.5.0", + "indexmap 2.6.0", "quick-xml 0.31.0", "strip-ansi-escapes", "thiserror", @@ -3968,7 +3977,7 @@ version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ - "indexmap 2.5.0", + "indexmap 2.6.0", "itoa", "memchr", "ryu", @@ -4016,7 +4025,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.5.0", + "indexmap 2.6.0", "serde", "serde_derive", "serde_json", @@ -4484,12 +4493,12 @@ dependencies = [ "flate2", "fs2", "futures", - "hashbrown 0.14.5", + "hashbrown 0.15.0", "hex", "hostname", "hyper", "imara-diff", - "indexmap 2.5.0", + "indexmap 2.6.0", "itertools 0.12.1", "jsonwebtoken", "lazy_static", @@ -4559,7 +4568,8 @@ dependencies = [ name = "spacetimedb-data-structures" version = "1.0.0-rc1" dependencies = [ - "hashbrown 0.14.5", + "ahash 0.8.11", + "hashbrown 0.15.0", "nohash-hasher", "serde", "smallvec", @@ -4695,7 +4705,7 @@ version = "1.0.0-rc1" dependencies = [ "anyhow", "enum-as-inner", - "hashbrown 0.14.5", + "hashbrown 0.15.0", "itertools 0.12.1", "lazy_static", "petgraph", @@ -5553,7 +5563,7 @@ version = "0.22.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b072cee73c449a636ffd6f32bd8de3a9f7119139aff882f44943ce2986dc5cf" dependencies = [ - "indexmap 2.5.0", + "indexmap 2.6.0", "serde", "serde_spanned", "toml_datetime", @@ -6107,7 +6117,7 @@ version = "0.116.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a58e28b80dd8340cb07b8242ae654756161f6fc8d0038123d679b7b99964fa50" dependencies = [ - "indexmap 2.5.0", + "indexmap 2.6.0", "semver", ] @@ -6121,7 +6131,7 @@ dependencies = [ "bincode", "bumpalo", "cfg-if", - "indexmap 2.5.0", + "indexmap 2.6.0", "libc", "log", "object 0.32.2", @@ -6220,7 +6230,7 @@ dependencies = [ "anyhow", "cranelift-entity", "gimli 0.28.1", - "indexmap 2.5.0", + "indexmap 2.6.0", "log", "object 0.32.2", "serde", @@ -6286,7 +6296,7 @@ dependencies = [ "anyhow", "cc", "cfg-if", - "indexmap 2.5.0", + "indexmap 2.6.0", "libc", "log", "mach", diff --git a/Cargo.toml b/Cargo.toml index 2de5800e6f..4dbf536732 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -152,7 +152,7 @@ futures = "0.3" futures-channel = "0.3" getrandom = "0.2.7" glob = "0.3.1" -hashbrown = "0.14" +hashbrown = { version = "0.15", default-features = false, features = ["equivalent", "inline-more"] } headers = "0.4" heck = "0.4" hex = "0.4.3" diff --git a/crates/bindings/tests/snapshots/deps__spacetimedb_bindings_dependencies.snap b/crates/bindings/tests/snapshots/deps__spacetimedb_bindings_dependencies.snap index b855db6931..8019dd6c39 100644 --- a/crates/bindings/tests/snapshots/deps__spacetimedb_bindings_dependencies.snap +++ b/crates/bindings/tests/snapshots/deps__spacetimedb_bindings_dependencies.snap @@ -64,14 +64,15 @@ spacetimedb │ ├── itertools (*) │ ├── spacetimedb_bindings_macro (*) │ ├── spacetimedb_data_structures +│ │ ├── ahash +│ │ │ ├── cfg_if +│ │ │ ├── getrandom (*) +│ │ │ ├── once_cell +│ │ │ └── zerocopy (*) +│ │ │ [build-dependencies] +│ │ │ └── version_check │ │ ├── hashbrown -│ │ │ ├── ahash -│ │ │ │ ├── cfg_if -│ │ │ │ ├── once_cell -│ │ │ │ └── zerocopy (*) -│ │ │ │ [build-dependencies] -│ │ │ │ └── version_check -│ │ │ └── allocator_api2 +│ │ │ └── equivalent │ │ ├── nohash_hasher │ │ ├── smallvec │ │ └── thiserror diff --git a/crates/client-api/src/routes/prometheus.rs b/crates/client-api/src/routes/prometheus.rs index 8fe819cf41..d8bece8b76 100644 --- a/crates/client-api/src/routes/prometheus.rs +++ b/crates/client-api/src/routes/prometheus.rs @@ -2,7 +2,7 @@ use crate::{log_and_500, ControlStateReadAccess}; use axum::extract::State; use axum::response::IntoResponse; use serde::{Deserialize, Serialize}; -use spacetimedb_data_structures::map::HashMap; +use spacetimedb_data_structures::map::{HashCollectionExt, HashMap}; #[derive(Serialize, Deserialize)] struct SDConfig { diff --git a/crates/core/src/db/relational_operators.rs b/crates/core/src/db/relational_operators.rs index 3e1fe11034..0f57441fdc 100644 --- a/crates/core/src/db/relational_operators.rs +++ b/crates/core/src/db/relational_operators.rs @@ -1,5 +1,5 @@ use core::marker::PhantomData; -use spacetimedb_data_structures::map::HashSet; +use spacetimedb_data_structures::map::{HashCollectionExt, HashSet}; use spacetimedb_sats::ProductValue; // NOTE diff --git a/crates/core/src/subscription/module_subscription_manager.rs b/crates/core/src/subscription/module_subscription_manager.rs index 2ed27f812a..f94c597834 100644 --- a/crates/core/src/subscription/module_subscription_manager.rs +++ b/crates/core/src/subscription/module_subscription_manager.rs @@ -10,7 +10,7 @@ use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use spacetimedb_client_api_messages::websocket::{ BsatnFormat, CompressableQueryUpdate, FormatSwitch, JsonFormat, QueryUpdate, }; -use spacetimedb_data_structures::map::{Entry, HashMap, HashSet, IntMap}; +use spacetimedb_data_structures::map::{Entry, HashCollectionExt, HashMap, HashSet, IntMap}; use spacetimedb_lib::{Address, Identity}; use spacetimedb_primitives::TableId; use std::sync::Arc; diff --git a/crates/core/src/subscription/subscription.rs b/crates/core/src/subscription/subscription.rs index 3fa6bfecd4..6ea2d87cb5 100644 --- a/crates/core/src/subscription/subscription.rs +++ b/crates/core/src/subscription/subscription.rs @@ -33,7 +33,7 @@ use anyhow::Context; use itertools::Either; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use spacetimedb_client_api_messages::websocket::{Compression, WebsocketFormat}; -use spacetimedb_data_structures::map::HashSet; +use spacetimedb_data_structures::map::{HashCollectionExt, HashSet}; use spacetimedb_lib::db::auth::{StAccess, StTableType}; use spacetimedb_lib::db::error::AuthError; use spacetimedb_lib::identity::AuthCtx; diff --git a/crates/data-structures/Cargo.toml b/crates/data-structures/Cargo.toml index 5d9fcd8101..f432dc024a 100644 --- a/crates/data-structures/Cargo.toml +++ b/crates/data-structures/Cargo.toml @@ -9,6 +9,7 @@ description = "Assorted data structures used in spacetimedb" serde = ["dep:serde"] [dependencies] +ahash.workspace = true hashbrown.workspace = true nohash-hasher.workspace = true serde = { workspace = true, optional = true } diff --git a/crates/data-structures/src/map.rs b/crates/data-structures/src/map.rs index 521b1f8f58..50085752fb 100644 --- a/crates/data-structures/src/map.rs +++ b/crates/data-structures/src/map.rs @@ -1,9 +1,12 @@ -use core::hash::BuildHasher; -pub use hashbrown::hash_map::{DefaultHashBuilder, Entry, RawEntryMut}; -pub use hashbrown::{HashMap, HashSet}; +use core::hash::{BuildHasher, BuildHasherDefault}; +pub use hashbrown::hash_map::Entry; use nohash_hasher::BuildNoHashHasher; pub use nohash_hasher::IsEnabled as ValidAsIdentityHash; +pub type DefaultHashBuilder = BuildHasherDefault; +pub type HashMap = hashbrown::HashMap; +pub type HashSet = hashbrown::HashSet; + /// A version of [`HashMap`] using the identity hash function, /// which is valid for any key type that can be converted to a `u64` without truncation. pub type IntMap = HashMap>; diff --git a/crates/lib/src/relation.rs b/crates/lib/src/relation.rs index c8cca67666..1c85596aa6 100644 --- a/crates/lib/src/relation.rs +++ b/crates/lib/src/relation.rs @@ -3,7 +3,7 @@ use crate::db::error::{RelationError, TypeError}; use core::fmt; use core::hash::Hash; use derive_more::From; -use spacetimedb_data_structures::map::HashSet; +use spacetimedb_data_structures::map::{HashCollectionExt, HashSet}; use spacetimedb_primitives::{ColId, ColList, ColSet, Constraints, TableId}; use spacetimedb_sats::algebraic_value::AlgebraicValue; use spacetimedb_sats::satn::Satn; diff --git a/crates/schema/src/type_for_generate.rs b/crates/schema/src/type_for_generate.rs index 75c7ee60cc..706be6e554 100644 --- a/crates/schema/src/type_for_generate.rs +++ b/crates/schema/src/type_for_generate.rs @@ -8,7 +8,7 @@ use petgraph::{ use smallvec::SmallVec; use spacetimedb_data_structures::{ error_stream::{CollectAllErrors, CombineErrors, ErrorStream}, - map::{HashMap, HashSet}, + map::{HashCollectionExt, HashMap, HashSet}, }; use spacetimedb_lib::{AlgebraicType, ProductTypeElement}; use spacetimedb_sats::{typespace::TypeRefError, AlgebraicTypeRef, ArrayType, SumTypeVariant, Typespace}; diff --git a/crates/sdk/src/spacetime_module.rs b/crates/sdk/src/spacetime_module.rs index 1712a188f8..feb6083dd3 100644 --- a/crates/sdk/src/spacetime_module.rs +++ b/crates/sdk/src/spacetime_module.rs @@ -10,7 +10,7 @@ use crate::{ }; use anyhow::Context; use bytes::Bytes; -use spacetimedb_data_structures::map::HashMap; +use spacetimedb_data_structures::map::{HashCollectionExt, HashMap}; use spacetimedb_lib::{bsatn, de::DeserializeOwned}; use std::{any::Any, fmt::Debug, hash::Hash}; diff --git a/crates/table/src/btree_index.rs b/crates/table/src/btree_index.rs index a35b50081c..6ab256d07d 100644 --- a/crates/table/src/btree_index.rs +++ b/crates/table/src/btree_index.rs @@ -445,7 +445,7 @@ mod test { use core::ops::Bound::*; use proptest::prelude::*; use proptest::{collection::vec, test_runner::TestCaseResult}; - use spacetimedb_data_structures::map::HashMap; + use spacetimedb_data_structures::map::{HashCollectionExt, HashMap}; use spacetimedb_primitives::ColId; use spacetimedb_sats::{ product, diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index f0f3e7d199..c9ed1a0c99 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -22,7 +22,7 @@ use core::hash::{Hash, Hasher}; use core::ops::RangeBounds; use core::{fmt, ptr}; use derive_more::{Add, AddAssign, From, Sub}; -use spacetimedb_data_structures::map::HashMap; +use spacetimedb_data_structures::map::{HashCollectionExt, HashMap}; use spacetimedb_lib::{bsatn::DecodeError, de::DeserializeOwned}; use spacetimedb_primitives::{ColId, ColList, IndexId}; use spacetimedb_sats::{ diff --git a/crates/testing/src/sdk.rs b/crates/testing/src/sdk.rs index f561d1cb7a..f1bf110824 100644 --- a/crates/testing/src/sdk.rs +++ b/crates/testing/src/sdk.rs @@ -1,7 +1,7 @@ use duct::cmd; use lazy_static::lazy_static; use rand::distributions::{Alphanumeric, DistString}; -use spacetimedb_data_structures::map::HashMap; +use spacetimedb_data_structures::map::{HashCollectionExt, HashMap}; use std::fs::create_dir_all; use std::sync::Mutex; use std::thread::JoinHandle; diff --git a/crates/vm/src/expr.rs b/crates/vm/src/expr.rs index b9685da4df..c138545866 100644 --- a/crates/vm/src/expr.rs +++ b/crates/vm/src/expr.rs @@ -6,7 +6,7 @@ use core::slice::from_ref; use derive_more::From; use itertools::Itertools; use smallvec::SmallVec; -use spacetimedb_data_structures::map::{HashSet, IntMap}; +use spacetimedb_data_structures::map::{HashCollectionExt, HashSet, IntMap}; use spacetimedb_lib::db::auth::{StAccess, StTableType}; use spacetimedb_lib::db::error::{AuthError, RelationError}; use spacetimedb_lib::relation::{ColExpr, DbTable, FieldName, Header}; diff --git a/crates/vm/src/rel_ops.rs b/crates/vm/src/rel_ops.rs index c7a0837e3d..2665e325b5 100644 --- a/crates/vm/src/rel_ops.rs +++ b/crates/vm/src/rel_ops.rs @@ -1,7 +1,7 @@ use core::iter; use crate::relation::RelValue; -use spacetimedb_data_structures::map::HashMap; +use spacetimedb_data_structures::map::{HashCollectionExt, HashMap}; use spacetimedb_lib::relation::ColExpr; use spacetimedb_sats::AlgebraicValue; From d8970044efafe4e9c4f9af2951b933a627ce7a6c Mon Sep 17 00:00:00 2001 From: Noa Date: Fri, 26 Jan 2024 13:54:17 -0600 Subject: [PATCH 2/2] Energy metering for persistent memory usage --- crates/client-api-messages/src/energy.rs | 7 + .../locking_tx_datastore/committed_state.rs | 13 + .../locking_tx_datastore/datastore.rs | 16 ++ .../locking_tx_datastore/sequence.rs | 16 ++ crates/core/src/db/relational_db.rs | 6 + crates/core/src/energy.rs | 3 + crates/core/src/host/host_controller.rs | 14 +- crates/core/src/replica_context.rs | 5 + crates/primitives/src/col_list.rs | 8 + crates/standalone/src/energy_monitor.rs | 5 + crates/table/src/bflatn_to_bsatn_fast_path.rs | 11 + crates/table/src/blob_store.rs | 18 ++ crates/table/src/btree_index.rs | 36 ++- crates/table/src/btree_index/multimap.rs | 9 + crates/table/src/fixed_bit_set.rs | 8 + crates/table/src/indexes.rs | 14 +- crates/table/src/layout.rs | 66 +++++ crates/table/src/lib.rs | 3 + crates/table/src/memory_usage.rs | 247 ++++++++++++++++++ crates/table/src/page.rs | 53 +++- crates/table/src/pages.rs | 9 + crates/table/src/pointer_map.rs | 17 +- crates/table/src/row_type_visitor.rs | 11 + crates/table/src/table.rs | 37 ++- 24 files changed, 621 insertions(+), 11 deletions(-) create mode 100644 crates/table/src/memory_usage.rs diff --git a/crates/client-api-messages/src/energy.rs b/crates/client-api-messages/src/energy.rs index 9b82ef69b1..f4f47491f3 100644 --- a/crates/client-api-messages/src/energy.rs +++ b/crates/client-api-messages/src/energy.rs @@ -36,6 +36,13 @@ impl EnergyQuanta { let energy = bytes_stored * sec + (bytes_stored * nsec) / 1_000_000_000; Self::new(energy) } + + const ENERGY_PER_MEM_BYTE_SEC: u128 = 100; + + pub fn from_memory_usage(bytes_stored: u64, storage_period: Duration) -> Self { + let byte_seconds = Self::from_disk_usage(bytes_stored, storage_period).get(); + Self::new(byte_seconds * Self::ENERGY_PER_MEM_BYTE_SEC) + } } impl fmt::Display for EnergyQuanta { diff --git a/crates/core/src/db/datastore/locking_tx_datastore/committed_state.rs b/crates/core/src/db/datastore/locking_tx_datastore/committed_state.rs index 1098530a9e..e6f1a53e40 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/committed_state.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/committed_state.rs @@ -37,6 +37,7 @@ use spacetimedb_table::{ blob_store::{BlobStore, HashMapBlobStore}, indexes::{RowPointer, SquashedOffset}, table::{IndexScanIter, InsertError, RowRef, Table}, + MemoryUsage, }; use std::collections::BTreeMap; use std::sync::Arc; @@ -55,6 +56,18 @@ pub struct CommittedState { pub(super) index_id_map: IndexIdMap, } +impl MemoryUsage for CommittedState { + fn heap_usage(&self) -> usize { + let Self { + next_tx_offset, + tables, + blob_store, + index_id_map, + } = self; + next_tx_offset.heap_usage() + tables.heap_usage() + blob_store.heap_usage() + index_id_map.heap_usage() + } +} + impl StateView for CommittedState { fn get_schema(&self, table_id: TableId) -> Option<&Arc> { self.tables.get(&table_id).map(|table| table.get_schema()) diff --git a/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs b/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs index b158e7b99c..f21b231771 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs @@ -37,6 +37,7 @@ use spacetimedb_snapshot::ReconstructedSnapshot; use spacetimedb_table::{ indexes::RowPointer, table::{RowRef, Table}, + MemoryUsage, }; use std::time::{Duration, Instant}; use std::{borrow::Cow, sync::Arc}; @@ -64,6 +65,21 @@ pub struct Locking { pub(crate) database_address: Address, } +impl MemoryUsage for Locking { + fn heap_usage(&self) -> usize { + let Self { + committed_state, + sequence_state, + database_address, + } = self; + std::mem::size_of_val(&**committed_state) + + committed_state.read().heap_usage() + + std::mem::size_of_val(&**sequence_state) + + sequence_state.lock().heap_usage() + + database_address.heap_usage() + } +} + impl Locking { pub fn new(database_address: Address) -> Self { Self { diff --git a/crates/core/src/db/datastore/locking_tx_datastore/sequence.rs b/crates/core/src/db/datastore/locking_tx_datastore/sequence.rs index 4331354263..369780fe2c 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/sequence.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/sequence.rs @@ -1,12 +1,21 @@ use spacetimedb_data_structures::map::IntMap; use spacetimedb_primitives::SequenceId; use spacetimedb_schema::schema::SequenceSchema; +use spacetimedb_table::MemoryUsage; pub(super) struct Sequence { schema: SequenceSchema, pub(super) value: i128, } +impl MemoryUsage for Sequence { + fn heap_usage(&self) -> usize { + // MEMUSE: intentionally ignoring schema + let Self { schema: _, value } = self; + value.heap_usage() + } +} + impl Sequence { pub(super) fn new(schema: SequenceSchema) -> Self { Self { @@ -102,6 +111,13 @@ pub(super) struct SequencesState { sequences: IntMap, } +impl MemoryUsage for SequencesState { + fn heap_usage(&self) -> usize { + let Self { sequences } = self; + sequences.heap_usage() + } +} + impl SequencesState { pub(super) fn get_sequence_mut(&mut self, seq_id: SequenceId) -> Option<&mut Sequence> { self.sequences.get_mut(&seq_id) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index ef493c0aa8..58e431f11d 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -36,6 +36,7 @@ use spacetimedb_schema::schema::{IndexSchema, RowLevelSecuritySchema, Schema, Se use spacetimedb_snapshot::{SnapshotError, SnapshotRepository}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::table::RowRef; +use spacetimedb_table::MemoryUsage; use std::borrow::Cow; use std::collections::HashSet; use std::fmt; @@ -490,6 +491,11 @@ impl RelationalDB { self.disk_size_fn.as_ref().map_or(Ok(0), |f| f()) } + /// The size in bytes of all of the in-memory data in this database. + pub fn size_in_memory(&self) -> usize { + self.inner.heap_usage() + } + pub fn encode_row(row: &ProductValue, bytes: &mut Vec) { // TODO: large file storage of the row elements row.encode(bytes); diff --git a/crates/core/src/energy.rs b/crates/core/src/energy.rs index 9fe3fd82f5..0e796e1bb2 100644 --- a/crates/core/src/energy.rs +++ b/crates/core/src/energy.rs @@ -21,6 +21,7 @@ pub trait EnergyMonitor: Send + Sync + 'static { execution_duration: Duration, ); fn record_disk_usage(&self, database: &Database, replica_id: u64, disk_usage: u64, period: Duration); + fn record_memory_usage(&self, database: &Database, replica_id: u64, mem_usage: u64, period: Duration); } #[derive(Default)] @@ -40,4 +41,6 @@ impl EnergyMonitor for NullEnergyMonitor { } fn record_disk_usage(&self, _database: &Database, _replica_id: u64, _disk_usage: u64, _period: Duration) {} + + fn record_memory_usage(&self, _database: &Database, _replica_id: u64, _mem_usage: u64, _period: Duration) {} } diff --git a/crates/core/src/host/host_controller.rs b/crates/core/src/host/host_controller.rs index 692beda21a..316e3afbc1 100644 --- a/crates/core/src/host/host_controller.rs +++ b/crates/core/src/host/host_controller.rs @@ -755,7 +755,7 @@ impl Host { } scheduler_starter.start(&module_host)?; - let metrics_task = tokio::spawn(disk_monitor(replica_ctx.clone(), energy_monitor.clone())).abort_handle(); + let metrics_task = tokio::spawn(storage_monitor(replica_ctx.clone(), energy_monitor.clone())).abort_handle(); Ok(Host { module: watch::Sender::new(module_host), @@ -826,22 +826,23 @@ impl Drop for Host { } } -const DISK_METERING_INTERVAL: Duration = Duration::from_secs(5); +const STORAGE_METERING_INTERVAL: Duration = Duration::from_secs(5); /// Periodically collect the disk usage of `replica_ctx` and update metrics as well as /// the `energy_monitor` accordingly. -async fn disk_monitor(replica_ctx: Arc, energy_monitor: Arc) { - let mut interval = tokio::time::interval(DISK_METERING_INTERVAL); +async fn storage_monitor(replica_ctx: Arc, energy_monitor: Arc) { + let mut interval = tokio::time::interval(STORAGE_METERING_INTERVAL); // We don't care about happening precisely every 5 seconds - it just matters // that the time between ticks is accurate. interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); - let mut prev_disk_usage = replica_ctx.total_disk_usage(); + let mut prev_disk_usage = tokio::task::block_in_place(|| replica_ctx.total_disk_usage()); let mut prev_tick = interval.tick().await; loop { let tick = interval.tick().await; let dt = tick - prev_tick; - let disk_usage = tokio::task::block_in_place(|| replica_ctx.total_disk_usage()); + let (disk_usage, mem_usage) = + tokio::task::block_in_place(|| (replica_ctx.total_disk_usage(), replica_ctx.mem_usage())); if let Some(num_bytes) = disk_usage.durability { DB_METRICS .message_log_size @@ -856,6 +857,7 @@ async fn disk_monitor(replica_ctx: Arc, energy_monitor: Arc usize { + self.relational_db.size_in_memory() + } } impl Deref for ReplicaContext { diff --git a/crates/primitives/src/col_list.rs b/crates/primitives/src/col_list.rs index cc1ecd7cb7..42d0933155 100644 --- a/crates/primitives/src/col_list.rs +++ b/crates/primitives/src/col_list.rs @@ -276,6 +276,14 @@ impl ColList { let addr = unsafe { self.check }; addr & 1 != 0 } + + #[doc(hidden)] + pub fn heap_size(&self) -> usize { + match self.as_inline() { + Ok(_) => 0, + Err(heap) => heap.capacity() as usize, + } + } } impl Drop for ColList { diff --git a/crates/standalone/src/energy_monitor.rs b/crates/standalone/src/energy_monitor.rs index 74ed2e4d6a..dd3be4a027 100644 --- a/crates/standalone/src/energy_monitor.rs +++ b/crates/standalone/src/energy_monitor.rs @@ -40,6 +40,11 @@ impl EnergyMonitor for StandaloneEnergyMonitor { let amount = EnergyQuanta::from_disk_usage(disk_usage, period); self.withdraw_energy(database.owner_identity, amount) } + + fn record_memory_usage(&self, database: &Database, _instance_id: u64, mem_usage: u64, period: Duration) { + let amount = EnergyQuanta::from_memory_usage(mem_usage, period); + self.withdraw_energy(database.owner_identity, amount) + } } impl StandaloneEnergyMonitor { diff --git a/crates/table/src/bflatn_to_bsatn_fast_path.rs b/crates/table/src/bflatn_to_bsatn_fast_path.rs index c953ba2654..023ec2c6a0 100644 --- a/crates/table/src/bflatn_to_bsatn_fast_path.rs +++ b/crates/table/src/bflatn_to_bsatn_fast_path.rs @@ -20,6 +20,8 @@ //! one of 20 bytes to copy the leading `(u64, u64, u32)`, which contains no padding, //! and then one of 8 bytes to copy the trailing `u64`, skipping over 4 bytes of padding in between. +use crate::MemoryUsage; + use super::{ indexes::{Byte, Bytes}, layout::{ @@ -47,6 +49,13 @@ pub(crate) struct StaticBsatnLayout { fields: Box<[MemcpyField]>, } +impl MemoryUsage for StaticBsatnLayout { + fn heap_usage(&self) -> usize { + let Self { bsatn_length, fields } = self; + bsatn_length.heap_usage() + fields.heap_usage() + } +} + impl StaticBsatnLayout { /// Serialize `row` from BFLATN to BSATN into `buf`. /// @@ -156,6 +165,8 @@ struct MemcpyField { length: u16, } +impl MemoryUsage for MemcpyField {} + impl MemcpyField { /// Copies the bytes at `row[self.bflatn_offset .. self.bflatn_offset + self.length]` /// into `buf[self.bsatn_offset + self.length]`. diff --git a/crates/table/src/blob_store.rs b/crates/table/src/blob_store.rs index b0883a8a30..9fb0611e20 100644 --- a/crates/table/src/blob_store.rs +++ b/crates/table/src/blob_store.rs @@ -15,6 +15,8 @@ use blake3::hash; use spacetimedb_data_structures::map::{Entry, HashMap}; use spacetimedb_lib::{de::Deserialize, ser::Serialize}; +use crate::MemoryUsage; + /// The content address of a blob-stored object. #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash, Debug, Serialize, Deserialize)] pub struct BlobHash { @@ -24,6 +26,8 @@ pub struct BlobHash { pub data: [u8; BlobHash::SIZE], } +impl MemoryUsage for BlobHash {} + impl BlobHash { /// The size of the hash function's output in bytes. pub const SIZE: usize = 32; @@ -142,6 +146,13 @@ pub struct HashMapBlobStore { map: HashMap, } +impl MemoryUsage for HashMapBlobStore { + fn heap_usage(&self) -> usize { + let Self { map } = self; + map.heap_usage() + } +} + /// A blob object including a reference count and the data. struct BlobObject { /// Reference count of the blob. @@ -150,6 +161,13 @@ struct BlobObject { blob: Box<[u8]>, } +impl MemoryUsage for BlobObject { + fn heap_usage(&self) -> usize { + let Self { uses, blob } = self; + uses.heap_usage() + blob.heap_usage() + } +} + impl BlobStore for HashMapBlobStore { fn clone_blob(&mut self, hash: &BlobHash) -> Result<(), NoSuchBlobError> { self.map.get_mut(hash).ok_or(NoSuchBlobError)?.uses += 1; diff --git a/crates/table/src/btree_index.rs b/crates/table/src/btree_index.rs index 6ab256d07d..42efea96ed 100644 --- a/crates/table/src/btree_index.rs +++ b/crates/table/src/btree_index.rs @@ -23,7 +23,7 @@ use super::indexes::RowPointer; use super::table::RowRef; -use crate::{read_column::ReadColumn, static_assert_size}; +use crate::{read_column::ReadColumn, static_assert_size, MemoryUsage}; use core::ops::RangeBounds; use spacetimedb_primitives::{ColList, IndexId}; use spacetimedb_sats::{ @@ -127,6 +127,28 @@ enum TypedIndex { AlgebraicValue(Index), } +impl MemoryUsage for TypedIndex { + fn heap_usage(&self) -> usize { + match self { + TypedIndex::Bool(this) => this.heap_usage(), + TypedIndex::U8(this) => this.heap_usage(), + TypedIndex::I8(this) => this.heap_usage(), + TypedIndex::U16(this) => this.heap_usage(), + TypedIndex::I16(this) => this.heap_usage(), + TypedIndex::U32(this) => this.heap_usage(), + TypedIndex::I32(this) => this.heap_usage(), + TypedIndex::U64(this) => this.heap_usage(), + TypedIndex::I64(this) => this.heap_usage(), + TypedIndex::U128(this) => this.heap_usage(), + TypedIndex::I128(this) => this.heap_usage(), + TypedIndex::U256(this) => this.heap_usage(), + TypedIndex::I256(this) => this.heap_usage(), + TypedIndex::String(this) => this.heap_usage(), + TypedIndex::AlgebraicValue(this) => this.heap_usage(), + } + } +} + impl TypedIndex { /// Add the row referred to by `row_ref` to the index `self`, /// which must be keyed at `cols`. @@ -329,6 +351,18 @@ pub struct BTreeIndex { pub key_type: AlgebraicType, } +impl MemoryUsage for BTreeIndex { + fn heap_usage(&self) -> usize { + let Self { + index_id, + is_unique, + idx, + key_type, + } = self; + index_id.heap_usage() + is_unique.heap_usage() + idx.heap_usage() + key_type.heap_usage() + } +} + static_assert_size!(BTreeIndex, 64); impl BTreeIndex { diff --git a/crates/table/src/btree_index/multimap.rs b/crates/table/src/btree_index/multimap.rs index c232f8b44a..881a287455 100644 --- a/crates/table/src/btree_index/multimap.rs +++ b/crates/table/src/btree_index/multimap.rs @@ -3,6 +3,8 @@ use core::slice; use smallvec::SmallVec; use std::collections::btree_map::{BTreeMap, Range}; +use crate::MemoryUsage; + /// A multi map that relates a `K` to a *set* of `V`s. #[derive(Default)] pub struct MultiMap { @@ -15,6 +17,13 @@ pub struct MultiMap { map: BTreeMap>, } +impl MemoryUsage for MultiMap { + fn heap_usage(&self) -> usize { + let Self { map } = self; + map.heap_usage() + } +} + impl MultiMap { /// Returns an empty multi map. pub fn new() -> Self { diff --git a/crates/table/src/fixed_bit_set.rs b/crates/table/src/fixed_bit_set.rs index 431735dfaa..76792dcc3a 100644 --- a/crates/table/src/fixed_bit_set.rs +++ b/crates/table/src/fixed_bit_set.rs @@ -5,6 +5,8 @@ use core::{ pub use internal_unsafe::FixedBitSet; use internal_unsafe::Len; +use crate::MemoryUsage; + /// A type used to represent blocks in a bit set. /// A smaller type, compared to usize, /// means taking less advantage of native operations. @@ -243,6 +245,12 @@ impl FixedBitSet { } } +impl MemoryUsage for FixedBitSet { + fn heap_usage(&self) -> usize { + std::mem::size_of_val(self.storage()) + } +} + /// An iterator that yields the set indices of a [`FixedBitSet`]. pub struct IterSet<'a, B = DefaultBitBlock> { /// The block iterator. diff --git a/crates/table/src/indexes.rs b/crates/table/src/indexes.rs index 2dd4ec8106..27e97a455d 100644 --- a/crates/table/src/indexes.rs +++ b/crates/table/src/indexes.rs @@ -2,7 +2,7 @@ //! bytes, row hashes, (page) sizes, offsets, and indices. use super::util::range_move; -use crate::static_assert_size; +use crate::{static_assert_size, MemoryUsage}; use ahash::RandomState; use core::fmt; use core::ops::{AddAssign, Div, Mul, Range, SubAssign}; @@ -53,6 +53,8 @@ pub const PAGE_DATA_SIZE: usize = PAGE_SIZE - PAGE_HEADER_SIZE; #[cfg_attr(any(test, feature = "proptest"), derive(proptest_derive::Arbitrary))] pub struct RowHash(pub u64); +impl MemoryUsage for RowHash {} + static_assert_size!(RowHash, 8); /// `RowHash` is already a hash, so no need to hash again. @@ -70,6 +72,8 @@ impl RowHash { #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Add, Sub)] pub struct Size(pub u16); +impl MemoryUsage for Size {} + // We need to be able to serialize and deserialize `Size` because they appear in the `PageHeader`. impl_serialize!([] Size, (self, ser) => self.0.serialize(ser)); impl_deserialize!([] Size, de => u16::deserialize(de).map(Size)); @@ -100,6 +104,8 @@ pub struct PageOffset( #[cfg_attr(any(test, feature = "proptest"), proptest(strategy = "0..PageOffset::PAGE_END.0"))] pub u16, ); +impl MemoryUsage for PageOffset {} + static_assert_size!(PageOffset, 2); // We need to ser/de `PageOffset`s because they appear within the `PageHeader`. @@ -201,6 +207,8 @@ impl fmt::LowerHex for PageOffset { #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub struct PageIndex(#[cfg_attr(any(test, feature = "proptest"), proptest(strategy = "0..MASK_PI"))] pub u64); +impl MemoryUsage for PageIndex {} + static_assert_size!(PageIndex, 8); impl PageIndex { @@ -230,6 +238,8 @@ impl PageIndex { #[cfg_attr(any(test, feature = "proptest"), derive(proptest_derive::Arbitrary))] pub struct SquashedOffset(pub u8); +impl MemoryUsage for SquashedOffset {} + static_assert_size!(SquashedOffset, 1); impl SquashedOffset { @@ -258,6 +268,8 @@ impl SquashedOffset { #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct RowPointer(pub u64); +impl MemoryUsage for RowPointer {} + static_assert_size!(RowPointer, 8); // Offsets and bits for the various components of `RowPointer`. diff --git a/crates/table/src/layout.rs b/crates/table/src/layout.rs index 2628cd4688..08cde9aa1d 100644 --- a/crates/table/src/layout.rs +++ b/crates/table/src/layout.rs @@ -6,6 +6,8 @@ //! These, and others, determine what the layout of objects typed at those types are. //! They also implement [`HasLayout`] which generalizes over layout annotated types. +use crate::MemoryUsage; + use super::{ indexes::Size, var_len::{VarLenGranule, VarLenRef}, @@ -55,6 +57,8 @@ pub struct Layout { pub align: u16, } +impl MemoryUsage for Layout {} + /// A type which knows what its layout is. /// /// This does not refer to layout in Rust. @@ -107,6 +111,17 @@ pub enum AlgebraicTypeLayout { VarLen(VarLenType), } +impl MemoryUsage for AlgebraicTypeLayout { + fn heap_usage(&self) -> usize { + match self { + AlgebraicTypeLayout::Sum(x) => x.heap_usage(), + AlgebraicTypeLayout::Product(x) => x.heap_usage(), + AlgebraicTypeLayout::Primitive(x) => x.heap_usage(), + AlgebraicTypeLayout::VarLen(x) => x.heap_usage(), + } + } +} + impl HasLayout for AlgebraicTypeLayout { fn layout(&self) -> &Layout { match self { @@ -170,6 +185,13 @@ pub const fn row_size_for_type() -> Size { #[derive(Debug, PartialEq, Eq, Clone)] pub struct RowTypeLayout(ProductTypeLayout); +impl MemoryUsage for RowTypeLayout { + fn heap_usage(&self) -> usize { + let Self(layout) = self; + layout.heap_usage() + } +} + impl RowTypeLayout { /// Returns a view of this row type as a product type. pub fn product(&self) -> &ProductTypeLayout { @@ -217,6 +239,13 @@ pub struct ProductTypeLayout { pub elements: Collection, } +impl MemoryUsage for ProductTypeLayout { + fn heap_usage(&self) -> usize { + let Self { layout, elements } = self; + layout.heap_usage() + elements.heap_usage() + } +} + impl HasLayout for ProductTypeLayout { fn layout(&self) -> &Layout { &self.layout @@ -239,6 +268,13 @@ pub struct ProductTypeElementLayout { pub name: Option>, } +impl MemoryUsage for ProductTypeElementLayout { + fn heap_usage(&self) -> usize { + let Self { offset, ty, name } = self; + offset.heap_usage() + ty.heap_usage() + name.heap_usage() + } +} + /// A mirrior of [`SumType`] annotated with a [`Layout`]. #[derive(Debug, PartialEq, Eq, Clone)] pub struct SumTypeLayout { @@ -251,6 +287,17 @@ pub struct SumTypeLayout { pub payload_offset: u16, } +impl MemoryUsage for SumTypeLayout { + fn heap_usage(&self) -> usize { + let Self { + layout, + variants, + payload_offset, + } = self; + layout.heap_usage() + variants.heap_usage() + payload_offset.heap_usage() + } +} + impl HasLayout for SumTypeLayout { fn layout(&self) -> &Layout { &self.layout @@ -270,6 +317,15 @@ pub struct SumTypeVariantLayout { pub name: Option>, } +impl MemoryUsage for SumTypeVariantLayout { + fn heap_usage(&self) -> usize { + let Self { ty, name } = self; + ty.heap_usage() + name.heap_usage() + } +} + +impl MemoryUsage for PrimitiveType {} + impl HasLayout for PrimitiveType { fn layout(&self) -> &'static Layout { match self { @@ -301,6 +357,16 @@ pub enum VarLenType { Map(Box), } +impl MemoryUsage for VarLenType { + fn heap_usage(&self) -> usize { + match self { + VarLenType::String => 0, + VarLenType::Array(x) => x.heap_usage(), + VarLenType::Map(x) => x.heap_usage(), + } + } +} + /// The layout of var-len objects. Aligned at a `u16` which it has 2 of. const VAR_LEN_REF_LAYOUT: Layout = Layout { size: 4, align: 2 }; const _: () = assert!(VAR_LEN_REF_LAYOUT.size as usize == mem::size_of::()); diff --git a/crates/table/src/lib.rs b/crates/table/src/lib.rs index 3f25cc6530..b702af487e 100644 --- a/crates/table/src/lib.rs +++ b/crates/table/src/lib.rs @@ -25,5 +25,8 @@ pub mod row_type_visitor; pub mod table; pub mod var_len; +mod memory_usage; +pub use memory_usage::MemoryUsage; + #[doc(hidden)] // Used in tests and benchmarks. pub mod util; diff --git a/crates/table/src/memory_usage.rs b/crates/table/src/memory_usage.rs new file mode 100644 index 0000000000..be8e519e1b --- /dev/null +++ b/crates/table/src/memory_usage.rs @@ -0,0 +1,247 @@ +use std::hash::{BuildHasher, Hash}; +use std::mem; + +use spacetimedb_sats::{ + algebraic_value::Packed, i256, u256, AlgebraicType, AlgebraicValue, ArrayType, ArrayValue, MapType, ProductType, + ProductTypeElement, ProductValue, SumType, SumTypeVariant, SumValue, +}; + +/// For inspecting how much memory a value is using. +/// +/// This trait specifically measures heap memory. If you want to measure stack memory too, add +/// `mem::size_of_val()` to it. (This only really matters for the outermost type in a hierarchy.) +pub trait MemoryUsage { + /// The **heap** memory usage of this type. The default implementation returns 0. + #[inline(always)] + fn heap_usage(&self) -> usize { + 0 + } +} + +impl MemoryUsage for bool {} +impl MemoryUsage for u8 {} +impl MemoryUsage for u16 {} +impl MemoryUsage for u32 {} +impl MemoryUsage for u64 {} +impl MemoryUsage for u128 {} +impl MemoryUsage for u256 {} +impl MemoryUsage for usize {} +impl MemoryUsage for i8 {} +impl MemoryUsage for i16 {} +impl MemoryUsage for i32 {} +impl MemoryUsage for i64 {} +impl MemoryUsage for i128 {} +impl MemoryUsage for i256 {} +impl MemoryUsage for isize {} +impl MemoryUsage for f32 {} +impl MemoryUsage for f64 {} + +impl MemoryUsage for spacetimedb_sats::F32 {} +impl MemoryUsage for spacetimedb_sats::F64 {} + +impl MemoryUsage for Box { + fn heap_usage(&self) -> usize { + mem::size_of_val::(self) + T::heap_usage(self) + } +} + +impl MemoryUsage for std::sync::Arc { + fn heap_usage(&self) -> usize { + let refcounts = mem::size_of::() * 2; + refcounts + mem::size_of_val::(self) + T::heap_usage(self) + } +} + +impl MemoryUsage for std::rc::Rc { + fn heap_usage(&self) -> usize { + let refcounts = mem::size_of::() * 2; + refcounts + mem::size_of_val::(self) + T::heap_usage(self) + } +} + +impl MemoryUsage for [T] { + fn heap_usage(&self) -> usize { + self.iter().map(T::heap_usage).sum() + } +} + +impl MemoryUsage for str {} + +impl MemoryUsage for Option { + fn heap_usage(&self) -> usize { + self.as_ref().map_or(0, T::heap_usage) + } +} + +impl MemoryUsage for (A, B) { + fn heap_usage(&self) -> usize { + self.0.heap_usage() + self.1.heap_usage() + } +} + +impl MemoryUsage for String { + fn heap_usage(&self) -> usize { + self.capacity() + } +} + +impl MemoryUsage for Vec { + fn heap_usage(&self) -> usize { + self.capacity() * mem::size_of::() + self.iter().map(T::heap_usage).sum::() + } +} + +impl MemoryUsage + for spacetimedb_data_structures::map::HashMap +{ + fn heap_usage(&self) -> usize { + self.allocation_size() + self.iter().map(|(k, v)| k.heap_usage() + v.heap_usage()).sum::() + } +} + +impl MemoryUsage for std::collections::BTreeMap { + fn heap_usage(&self) -> usize { + // NB: this is best-effort, since we don't have a `capacity()` method on `BTreeMap`. + self.len() * mem::size_of::<(K, V)>() + self.iter().map(|(k, v)| k.heap_usage() + v.heap_usage()).sum::() + } +} + +impl MemoryUsage for smallvec::SmallVec +where + A::Item: MemoryUsage, +{ + fn heap_usage(&self) -> usize { + self.as_slice().heap_usage() + + if self.spilled() { + self.capacity() * mem::size_of::() + } else { + 0 + } + } +} + +impl MemoryUsage for spacetimedb_primitives::TableId {} +impl MemoryUsage for spacetimedb_primitives::SequenceId {} +impl MemoryUsage for spacetimedb_primitives::ConstraintId {} +impl MemoryUsage for spacetimedb_primitives::IndexId {} +impl MemoryUsage for spacetimedb_primitives::ColId {} +impl MemoryUsage for spacetimedb_primitives::ColList { + fn heap_usage(&self) -> usize { + self.heap_size() + } +} + +impl MemoryUsage for AlgebraicValue { + fn heap_usage(&self) -> usize { + match self { + AlgebraicValue::Sum(x) => x.heap_usage(), + AlgebraicValue::Product(x) => x.heap_usage(), + AlgebraicValue::Array(x) => x.heap_usage(), + AlgebraicValue::Map(x) => x.heap_usage(), + AlgebraicValue::String(x) => x.heap_usage(), + _ => 0, + } + } +} +impl MemoryUsage for SumValue { + fn heap_usage(&self) -> usize { + self.value.heap_usage() + } +} +impl MemoryUsage for ProductValue { + fn heap_usage(&self) -> usize { + self.elements.heap_usage() + } +} +impl MemoryUsage for ArrayValue { + fn heap_usage(&self) -> usize { + match self { + ArrayValue::Sum(v) => v.heap_usage(), + ArrayValue::Product(v) => v.heap_usage(), + ArrayValue::Bool(v) => v.heap_usage(), + ArrayValue::I8(v) => v.heap_usage(), + ArrayValue::U8(v) => v.heap_usage(), + ArrayValue::I16(v) => v.heap_usage(), + ArrayValue::U16(v) => v.heap_usage(), + ArrayValue::I32(v) => v.heap_usage(), + ArrayValue::U32(v) => v.heap_usage(), + ArrayValue::I64(v) => v.heap_usage(), + ArrayValue::U64(v) => v.heap_usage(), + ArrayValue::I128(v) => v.heap_usage(), + ArrayValue::U128(v) => v.heap_usage(), + ArrayValue::I256(v) => v.heap_usage(), + ArrayValue::U256(v) => v.heap_usage(), + ArrayValue::F32(v) => v.heap_usage(), + ArrayValue::F64(v) => v.heap_usage(), + ArrayValue::String(v) => v.heap_usage(), + ArrayValue::Array(v) => v.heap_usage(), + ArrayValue::Map(v) => v.heap_usage(), + } + } +} +impl MemoryUsage for AlgebraicType { + fn heap_usage(&self) -> usize { + match self { + AlgebraicType::Ref(_) => 0, + AlgebraicType::Sum(x) => x.heap_usage(), + AlgebraicType::Product(x) => x.heap_usage(), + AlgebraicType::Array(x) => x.heap_usage(), + AlgebraicType::Map(x) => x.heap_usage(), + AlgebraicType::String + | AlgebraicType::Bool + | AlgebraicType::I8 + | AlgebraicType::U8 + | AlgebraicType::I16 + | AlgebraicType::U16 + | AlgebraicType::I32 + | AlgebraicType::U32 + | AlgebraicType::I64 + | AlgebraicType::U64 + | AlgebraicType::I128 + | AlgebraicType::U128 + | AlgebraicType::I256 + | AlgebraicType::U256 + | AlgebraicType::F32 + | AlgebraicType::F64 => 0, + } + } +} +impl MemoryUsage for SumType { + fn heap_usage(&self) -> usize { + self.variants.heap_usage() + } +} +impl MemoryUsage for SumTypeVariant { + fn heap_usage(&self) -> usize { + self.name.heap_usage() + self.algebraic_type.heap_usage() + } +} +impl MemoryUsage for ProductType { + fn heap_usage(&self) -> usize { + self.elements.heap_usage() + } +} +impl MemoryUsage for ProductTypeElement { + fn heap_usage(&self) -> usize { + self.name.heap_usage() + self.algebraic_type.heap_usage() + } +} +impl MemoryUsage for ArrayType { + fn heap_usage(&self) -> usize { + self.elem_ty.heap_usage() + } +} +impl MemoryUsage for MapType { + fn heap_usage(&self) -> usize { + self.key_ty.heap_usage() + self.ty.heap_usage() + } +} + +impl MemoryUsage for Packed { + fn heap_usage(&self) -> usize { + { self.0 }.heap_usage() + } +} + +impl MemoryUsage for spacetimedb_lib::Address {} +impl MemoryUsage for spacetimedb_lib::Identity {} diff --git a/crates/table/src/page.rs b/crates/table/src/page.rs index 1b915a69ba..310333ce07 100644 --- a/crates/table/src/page.rs +++ b/crates/table/src/page.rs @@ -39,7 +39,7 @@ use super::{ layout::MIN_ROW_SIZE, var_len::{is_granule_offset_aligned, VarLenGranule, VarLenGranuleHeader, VarLenMembers, VarLenRef}, }; -use crate::{fixed_bit_set::IterSet, static_assert_size, table::BlobNumBytes}; +use crate::{fixed_bit_set::IterSet, static_assert_size, table::BlobNumBytes, MemoryUsage}; use core::{mem, ops::ControlFlow, ptr}; use spacetimedb_lib::{de::Deserialize, ser::Serialize}; use thiserror::Error; @@ -63,6 +63,13 @@ struct FreeCellRef { next: PageOffset, } +impl MemoryUsage for FreeCellRef { + fn heap_usage(&self) -> usize { + let Self { next } = self; + next.heap_usage() + } +} + impl FreeCellRef { /// The sentinel for NULL cell references. const NIL: Self = Self { @@ -157,6 +164,21 @@ struct FixedHeader { fixed_row_size: Size, } +impl MemoryUsage for FixedHeader { + fn heap_usage(&self) -> usize { + let Self { + next_free, + last, + num_rows, + present_rows, + // MEMUSE: it's just a u16, ok to ignore + #[cfg(debug_assertions)] + fixed_row_size: _, + } = self; + next_free.heap_usage() + last.heap_usage() + num_rows.heap_usage() + present_rows.heap_usage() + } +} + #[cfg(debug_assertions)] static_assert_size!(FixedHeader, 18); @@ -250,6 +272,17 @@ struct VarHeader { first: PageOffset, } +impl MemoryUsage for VarHeader { + fn heap_usage(&self) -> usize { + let Self { + next_free, + freelist_len, + first, + } = self; + next_free.heap_usage() + freelist_len.heap_usage() + first.heap_usage() + } +} + static_assert_size!(VarHeader, 6); impl Default for VarHeader { @@ -293,6 +326,18 @@ struct PageHeader { unmodified_hash: Option, } +impl MemoryUsage for PageHeader { + fn heap_usage(&self) -> usize { + let Self { + fixed, + var, + // MEMUSE: no allocation, ok to ignore + unmodified_hash: _, + } = self; + fixed.heap_usage() + var.heap_usage() + } +} + static_assert_size!(PageHeader, PAGE_HEADER_SIZE); impl PageHeader { @@ -372,6 +417,12 @@ pub struct Page { row_data: [Byte; PageOffset::PAGE_END.idx()], } +impl MemoryUsage for Page { + fn heap_usage(&self) -> usize { + self.header.heap_usage() + } +} + static_assert_size!(Page, PAGE_SIZE); /// A mutable view of the fixed-len section of a [`Page`]. diff --git a/crates/table/src/pages.rs b/crates/table/src/pages.rs index 87333d9b49..c434f91eaf 100644 --- a/crates/table/src/pages.rs +++ b/crates/table/src/pages.rs @@ -1,5 +1,7 @@ //! Provides [`Pages`], a page manager dealing with [`Page`]s as a collection. +use crate::MemoryUsage; + use super::blob_store::BlobStore; use super::indexes::{Bytes, PageIndex, PageOffset, RowPointer, Size}; use super::page::Page; @@ -40,6 +42,13 @@ pub struct Pages { non_full_pages: Vec, } +impl MemoryUsage for Pages { + fn heap_usage(&self) -> usize { + let Self { pages, non_full_pages } = self; + pages.heap_usage() + non_full_pages.heap_usage() + } +} + impl Pages { /// Is there space to allocate another page? pub fn can_allocate_new_page(&self) -> Result { diff --git a/crates/table/src/pointer_map.rs b/crates/table/src/pointer_map.rs index 15972555e5..fbe5879e89 100644 --- a/crates/table/src/pointer_map.rs +++ b/crates/table/src/pointer_map.rs @@ -14,7 +14,7 @@ //! retrieval is probably no more than 100% slower. use super::indexes::{PageIndex, PageOffset, RowHash, RowPointer, SquashedOffset}; -use crate::static_assert_size; +use crate::{static_assert_size, MemoryUsage}; use core::{hint, slice}; use spacetimedb_data_structures::map::{ Entry, @@ -25,6 +25,8 @@ use spacetimedb_data_structures::map::{ #[derive(Clone, Copy, PartialEq, Eq, Debug)] struct ColliderSlotIndex(u32); +impl MemoryUsage for ColliderSlotIndex {} + impl ColliderSlotIndex { /// Returns a new slot index based on `idx`. fn new(idx: usize) -> Self { @@ -43,6 +45,8 @@ impl ColliderSlotIndex { #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] struct PtrOrCollider(RowPointer); +impl MemoryUsage for PtrOrCollider {} + /// An unpacked representation of [`&mut PtrOrCollider`](PtrOrCollider). enum MapSlotRef<'map> { /// The hash has no collision and is associated to a single row pointer. @@ -149,6 +153,17 @@ pub struct PointerMap { emptied_collider_slots: Vec, } +impl MemoryUsage for PointerMap { + fn heap_usage(&self) -> usize { + let Self { + map, + colliders, + emptied_collider_slots, + } = self; + map.heap_usage() + colliders.heap_usage() + emptied_collider_slots.heap_usage() + } +} + static_assert_size!(PointerMap, 80); // Provides some type invariant checks. diff --git a/crates/table/src/row_type_visitor.rs b/crates/table/src/row_type_visitor.rs index 55b9da5bf9..126476284b 100644 --- a/crates/table/src/row_type_visitor.rs +++ b/crates/table/src/row_type_visitor.rs @@ -27,6 +27,8 @@ //! The `VarLenMembers` impl for `VarLenVisitorProgram` //! implements a simple interpreter loop for the var-len visitor bytecode. +use crate::MemoryUsage; + use super::{ indexes::{Byte, Bytes, PageOffset}, layout::{align_to, AlgebraicTypeLayout, HasLayout, ProductTypeLayout, RowTypeLayout, SumTypeLayout}, @@ -315,6 +317,8 @@ impl Insn { const FIXUP: Self = Self::Goto(u16::MAX); } +impl MemoryUsage for Insn {} + #[allow(clippy::disallowed_macros)] // This is for test code. pub fn dump_visitor_program(program: &VarLenVisitorProgram) { for (idx, insn) in program.insns.iter().enumerate() { @@ -354,6 +358,13 @@ pub struct VarLenVisitorProgram { insns: Arc<[Insn]>, } +impl MemoryUsage for VarLenVisitorProgram { + fn heap_usage(&self) -> usize { + let Self { insns } = self; + insns.heap_usage() + } +} + /// Evalutes the `program`, /// provided the `instr_ptr` as its program counter / intruction pointer, /// and a callback `read_tag` to extract a tag at the given offset, diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index c9ed1a0c99..bc5fd0917f 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -16,7 +16,7 @@ use super::{ read_column::{ReadColumn, TypeError}, row_hash::hash_row_in_page, row_type_visitor::{row_type_visitor, VarLenVisitorProgram}, - static_assert_size, + static_assert_size, MemoryUsage, }; use core::hash::{Hash, Hasher}; use core::ops::RangeBounds; @@ -41,6 +41,8 @@ use thiserror::Error; #[derive(Copy, Clone, PartialEq, Eq, Debug, Default, From, Add, Sub, AddAssign)] pub struct BlobNumBytes(usize); +impl MemoryUsage for BlobNumBytes {} + /// A database table containing the row schema, the rows, and indices. /// /// The table stores the rows into a page manager @@ -125,6 +127,39 @@ impl TableInner { static_assert_size!(Table, 256); +impl MemoryUsage for Table { + fn heap_usage(&self) -> usize { + let Self { + inner, + pointer_map, + indexes, + // MEMUSE: intentionally ignoring schema + schema: _, + squashed_offset, + row_count, + blob_store_bytes, + } = self; + inner.heap_usage() + + pointer_map.heap_usage() + + indexes.heap_usage() + + squashed_offset.heap_usage() + + row_count.heap_usage() + + blob_store_bytes.heap_usage() + } +} + +impl MemoryUsage for TableInner { + fn heap_usage(&self) -> usize { + let Self { + row_layout, + static_bsatn_layout, + visitor_prog, + pages, + } = self; + row_layout.heap_usage() + static_bsatn_layout.heap_usage() + visitor_prog.heap_usage() + pages.heap_usage() + } +} + /// Various error that can happen on table insertion. #[derive(Error, Debug)] pub enum InsertError {