Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change(scan): Create a scanner storage database, but don't use it yet #8031

Merged
merged 11 commits into from
Nov 30, 2023
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5804,6 +5804,7 @@ dependencies = [
"indexmap 2.1.0",
"jubjub",
"rand 0.8.5",
"semver 1.0.20",
"serde",
"tokio",
"tower",
Expand Down
2 changes: 1 addition & 1 deletion zebra-consensus/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};

/// Configuration for parallel semantic verification:
/// <https://zebra.zfnd.org/dev/rfcs/0002-parallel-verification.html#definitions>
#[derive(Clone, Debug, Deserialize, Serialize)]
#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
#[serde(
deny_unknown_fields,
default,
Expand Down
2 changes: 1 addition & 1 deletion zebra-network/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub use cache_dir::CacheDir;
const MAX_SINGLE_SEED_PEER_DNS_RETRIES: usize = 0;

/// Configuration for networking code.
#[derive(Clone, Debug, Serialize)]
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
#[serde(deny_unknown_fields, default)]
pub struct Config {
/// The address on which this node should listen for connections.
Expand Down
2 changes: 1 addition & 1 deletion zebra-rpc/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
pub mod mining;

/// RPC configuration section.
#[derive(Clone, Debug, Deserialize, Serialize)]
#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
#[serde(deny_unknown_fields, default)]
pub struct Config {
/// IP address and port for the RPC server.
Expand Down
3 changes: 2 additions & 1 deletion zebra-scan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ categories = ["cryptography::cryptocurrencies"]

color-eyre = "0.6.2"
indexmap = { version = "2.0.1", features = ["serde"] }
semver = "1.0.20"
serde = { version = "1.0.193", features = ["serde_derive"] }
tokio = "1.34.0"
tower = "0.4.13"
Expand All @@ -31,7 +32,7 @@ zcash_client_backend = "0.10.0-rc.1"
zcash_primitives = "0.13.0-rc.1"

zebra-chain = { path = "../zebra-chain", version = "1.0.0-beta.31" }
zebra-state = { path = "../zebra-state", version = "1.0.0-beta.31" }
zebra-state = { path = "../zebra-state", version = "1.0.0-beta.31", features = ["shielded-scan"] }

[dev-dependencies]

Expand Down
27 changes: 26 additions & 1 deletion zebra-scan/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,46 @@
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};

use zebra_state::Config as DbConfig;

use crate::storage::SaplingScanningKey;

#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
#[serde(deny_unknown_fields, default)]
/// Configuration for scanning.
pub struct Config {
/// The sapling keys to scan for and the birthday height of each of them.
// TODO: any value below sapling activation as the birthday height should default to sapling activation.
//
// TODO: allow keys without birthdays
pub sapling_keys_to_scan: IndexMap<SaplingScanningKey, u32>,

/// The scanner results database config.
//
// TODO: Remove fields that are only used by the state to create a common database config.
#[serde(flatten)]
db_config: DbConfig,
}

impl Default for Config {
fn default() -> Self {
Self {
sapling_keys_to_scan: IndexMap::new(),
db_config: DbConfig::default(),
}
}
}

impl Config {
/// Returns a config for a temporary database that is deleted when it is dropped.
pub fn ephemeral() -> Self {
Self {
db_config: DbConfig::ephemeral(),
..Self::default()
}
}

/// Returns the database-specific config.
pub fn db_config(&self) -> &DbConfig {
&self.db_config
}
}
21 changes: 21 additions & 0 deletions zebra-scan/src/init.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//! Initializing the scanner.

use color_eyre::Report;
use tokio::task::JoinHandle;
use tracing::Instrument;

use zebra_chain::parameters::Network;

use crate::{scan, storage::Storage, Config};

/// Initialize the scanner based on its config.
pub fn init(
config: &Config,
network: Network,
state: scan::State,
) -> JoinHandle<Result<(), Report>> {
let storage = Storage::new(config, network);

// TODO: add more tasks here?
tokio::spawn(scan::start(state, storage).in_current_span())
}
4 changes: 4 additions & 0 deletions zebra-scan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,12 @@
#![doc(html_root_url = "https://docs.rs/zebra_scan")]

pub mod config;
pub mod init;
pub mod scan;
pub mod storage;

#[cfg(test)]
mod tests;

pub use config::Config;
pub use init::init;
5 changes: 3 additions & 2 deletions zebra-scan/src/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ use zebra_chain::{

use crate::storage::Storage;

type State = Buffer<
/// The generic state type used by the scanner.
pub type State = Buffer<
BoxService<zebra_state::Request, zebra_state::Response, zebra_state::BoxError>,
zebra_state::Request,
>;
Expand All @@ -35,7 +36,7 @@ const CHECK_INTERVAL: Duration = Duration::from_secs(10);
/// Start the scan task given state and storage.
///
/// - This function is dummy at the moment. It just makes sure we can read the storage and the state.
/// - Modificatiuons here might have an impact in the `scan_task_starts` test.
/// - Modifications here might have an impact in the `scan_task_starts` test.
/// - Real scanning code functionality will be added in the future here.
pub async fn start(mut state: State, storage: Storage) -> Result<(), Report> {
// We want to make sure the state has a tip height available before we start scanning.
Expand Down
68 changes: 54 additions & 14 deletions zebra-scan/src/storage.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,71 @@
//! Store viewing keys and results of the scan.

#![allow(dead_code)]

use std::collections::HashMap;

use zebra_chain::{block::Height, transaction::Hash};
use zebra_chain::{block::Height, parameters::Network, transaction::Hash};

use crate::config::Config;

pub mod db;

/// The type used in Zebra to store Sapling scanning keys.
/// It can represent a full viewing key or an individual viewing key.
pub type SaplingScanningKey = String;

/// Store key info and results of the scan.
#[allow(dead_code)]
///
/// `rocksdb` allows concurrent writes through a shared reference,
/// so clones of the scanner storage represent the same database instance.
/// When the final clone is dropped, the database is closed.
#[derive(Clone, Debug)]
pub struct Storage {
// Configuration
//
// This configuration cannot be modified after the database is initialized,
// because some clones would have different values.
//
// TODO: add config if needed?

// Owned State
//
// Everything contained in this state must be shared by all clones, or read-only.
//
/// The underlying database.
///
/// `rocksdb` allows reads and writes via a shared reference,
/// so this database object can be freely cloned.
/// The last instance that is dropped will close the underlying database.
//
// This database is created but not actually used for results.
// TODO: replace the fields below with a database instance.
db: db::ScannerDb,

/// The sapling key and an optional birthday for it.
sapling_keys: HashMap<SaplingScanningKey, Option<Height>>,

/// The sapling key and the related transaction id.
sapling_results: HashMap<SaplingScanningKey, Vec<Hash>>,
}

#[allow(dead_code)]
impl Storage {
/// Create a new storage.
pub fn new() -> Self {
Self {
sapling_keys: HashMap::new(),
sapling_results: HashMap::new(),
/// Opens and returns the on-disk scanner results storage for `config` and `network`.
/// If there is no existing storage, creates a new storage on disk.
///
/// TODO:
/// New keys in `config` are inserted into the database with their birthday heights. Shielded
/// activation is the minimum birthday height.
///
/// Birthdays and scanner progress are marked by inserting an empty result for that height.
pub fn new(config: &Config, network: Network) -> Self {
let mut storage = Self::new_db(config, network);

for (key, birthday) in config.sapling_keys_to_scan.iter() {
storage.add_sapling_key(key.clone(), Some(zebra_chain::block::Height(*birthday)));
}

storage
}

/// Add a sapling key to the storage.
Expand All @@ -43,18 +83,18 @@ impl Storage {
}

/// Get the results of a sapling key.
//
// TODO: Rust style - remove "get_" from these names
pub fn get_sapling_results(&self, key: &str) -> Vec<Hash> {
self.sapling_results.get(key).cloned().unwrap_or_default()
}

/// Get all keys and their birthdays.
//
// TODO: any value below sapling activation as the birthday height, or `None`, should default
// to sapling activation. This requires the configured network.
// Return Height not Option<Height>.
pub fn get_sapling_keys(&self) -> HashMap<String, Option<Height>> {
self.sapling_keys.clone()
}
}

impl Default for Storage {
fn default() -> Self {
Self::new()
}
}
104 changes: 104 additions & 0 deletions zebra-scan/src/storage/db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//! Persistent storage for scanner results.

use std::{collections::HashMap, path::Path};

use semver::Version;

use zebra_chain::parameters::Network;

use crate::Config;

use super::Storage;

// Public types and APIs
pub use zebra_state::ZebraDb as ScannerDb;

/// The directory name used to distinguish the scanner database from Zebra's other databases or
/// flat files.
///
/// We use "private" in the name to warn users not to share this data.
pub const SCANNER_DATABASE_KIND: &str = "private-scan";

/// The column families supported by the running `zebra-scan` database code.
///
/// Existing column families that aren't listed here are preserved when the database is opened.
pub const SCANNER_COLUMN_FAMILIES_IN_CODE: &[&str] = &[
// Sapling
"sapling_tx_ids",
// Orchard
// TODO
];

impl Storage {
/// Opens and returns an on-disk scanner results database instance for `config` and `network`.
/// If there is no existing database, creates a new database on disk.
///
/// New keys in `config` are not inserted into the database.
pub(crate) fn new_db(config: &Config, network: Network) -> Self {
Self::new_with_debug(
config, network,
// TODO: make format upgrades work with any database, then change this to `false`
true,
)
}

/// Returns an on-disk database instance with the supplied production and debug settings.
/// If there is no existing database, creates a new database on disk.
///
/// New keys in `config` are not inserted into the database.
///
/// This method is intended for use in tests.
pub(crate) fn new_with_debug(
config: &Config,
network: Network,
debug_skip_format_upgrades: bool,
) -> Self {
let db = ScannerDb::new(
config.db_config(),
SCANNER_DATABASE_KIND,
&Self::database_format_version_in_code(),
network,
debug_skip_format_upgrades,
SCANNER_COLUMN_FAMILIES_IN_CODE
.iter()
.map(ToString::to_string),
);

let new_storage = Self {
db,
sapling_keys: HashMap::new(),
sapling_results: HashMap::new(),
};

// TODO: report the last scanned height here?
tracing::info!("loaded Zebra scanner cache");

new_storage
}

/// The database format version in the running scanner code.
pub fn database_format_version_in_code() -> Version {
// TODO: implement scanner database versioning
Version::new(0, 0, 0)
}

/// Returns the configured network for this database.
pub fn network(&self) -> Network {
self.db.network()
}

/// Returns the `Path` where the files used by this database are located.
pub fn path(&self) -> &Path {
self.db.path()
}

/// Check for panics in code running in spawned threads.
/// If a thread exited with a panic, resume that panic.
///
/// This method should be called regularly, so that panics are detected as soon as possible.
//
// TODO: when we implement format changes, call this method regularly
pub fn check_for_panics(&mut self) {
self.db.check_for_panics()
}
}
10 changes: 7 additions & 3 deletions zebra-scan/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,14 @@ use zcash_primitives::{
};

use zebra_chain::{
block::Block, chain_tip::ChainTip, serialization::ZcashDeserializeInto, transaction::Hash,
block::Block, chain_tip::ChainTip, parameters::Network, serialization::ZcashDeserializeInto,
transaction::Hash,
};

use crate::scan::{block_to_compact, scan_block};
use crate::{
config::Config,
scan::{block_to_compact, scan_block},
};

/// Prove that we can create fake blocks with fake notes and scan them using the
/// `zcash_client_backend::scanning::scan_block` function:
Expand Down Expand Up @@ -177,7 +181,7 @@ fn scanning_fake_blocks_store_key_and_results() -> Result<()> {
zcash_client_backend::encoding::encode_extended_full_viewing_key("zxviews", &extfvk);

// Create a database
let mut s = crate::storage::Storage::new();
let mut s = crate::storage::Storage::new(&Config::ephemeral(), Network::Mainnet);

// Insert the generated key to the database
s.add_sapling_key(key_to_be_stored.clone(), None);
Expand Down
Loading
Loading