From 6b1d1bb9a4af461f2bd428287989f35429ecb45b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 06:39:18 -0700 Subject: [PATCH 01/42] make error catchable --- src/core/src/index/revindex/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/index/revindex/mod.rs b/src/core/src/index/revindex/mod.rs index aac0d47c14..3345424fdf 100644 --- a/src/core/src/index/revindex/mod.rs +++ b/src/core/src/index/revindex/mod.rs @@ -186,7 +186,7 @@ impl RevIndex { pub fn open>(index: P, read_only: bool, spec: Option<&str>) -> Result { let opts = db_options(); - let cfs = DB::list_cf(&opts, index.as_ref()).unwrap(); + let cfs = DB::list_cf(&opts, index.as_ref())?; if cfs.into_iter().any(|c| c == COLORS) { // TODO: ColorRevIndex can't be read-only for now, From 7e73e8ffd9caabb29085e192740d5d8761a1b38c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 07:03:04 -0700 Subject: [PATCH 02/42] flag another problematic unwrap --- src/core/src/storage/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 12f456fc22..6791b7219b 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -657,7 +657,8 @@ impl Storage for MemStorage { } fn load_sig(&self, path: &str) -> Result { - Ok(self.sigs.read().unwrap().get(path).unwrap().clone()) + let x = self.sigs.read().unwrap(); // @CTB this is causing a problem + Ok(x.get(path).unwrap().clone()) } fn save_sig(&self, path: &str, sig: Signature) -> Result { From 2896c496231801f2d53a525e914396be529b256f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 07:11:14 -0700 Subject: [PATCH 03/42] more --- src/core/src/storage/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 6791b7219b..714fb63162 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -657,8 +657,11 @@ impl Storage for MemStorage { } fn load_sig(&self, path: &str) -> Result { - let x = self.sigs.read().unwrap(); // @CTB this is causing a problem - Ok(x.get(path).unwrap().clone()) + let x = self.sigs.read().unwrap(); + match x.get(path) { + Some(path) => Ok(path.clone()), + None => panic!("cannot get path") + } } fn save_sig(&self, path: &str, sig: Signature) -> Result { From 45b1a8ff2f24e4742c9457bc388b5e9a1fd637f9 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 07:12:58 -0700 Subject: [PATCH 04/42] more --- src/core/src/storage/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 714fb63162..856f8ff210 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -660,7 +660,7 @@ impl Storage for MemStorage { let x = self.sigs.read().unwrap(); match x.get(path) { Some(path) => Ok(path.clone()), - None => panic!("cannot get path") + None => panic!("cannot get path '{path}'") } } From e8d79994745186b3be01c86fbf3a9a2e1e953aa6 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 07:43:29 -0700 Subject: [PATCH 05/42] upd log message --- src/core/src/index/revindex/disk_revindex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/index/revindex/disk_revindex.rs b/src/core/src/index/revindex/disk_revindex.rs index 511bdbcc4c..ac36c5b626 100644 --- a/src/core/src/index/revindex/disk_revindex.rs +++ b/src/core/src/index/revindex/disk_revindex.rs @@ -114,7 +114,7 @@ impl RevIndex { info!("Compact SSTs"); index.compact(); - info!("Processed {} reference sigs", processed_sigs.into_inner()); + info!("Done! Processed {} reference sigs", processed_sigs.into_inner()); Ok(module::RevIndex::Plain(index)) } From 53bcf0274f4b7bdec81077b44591af3a932a0270 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 08:10:04 -0700 Subject: [PATCH 06/42] provide correct error --- src/core/src/collection.rs | 3 +-- src/core/src/errors.rs | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index bc03274c41..38312f6de1 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -121,8 +121,7 @@ impl Collection { .zip(other.iter()) .all(|((id1, rec1), (id2, rec2))| id1 == id2 && rec1 == rec2) .then(|| self.len()) - // TODO: right error here - .ok_or(Error::MismatchKSizes) + .ok_or(Error::CollectionNotSuperset) } pub fn from_zipfile>(zipfile: P) -> Result { diff --git a/src/core/src/errors.rs b/src/core/src/errors.rs index 90c028eb38..1a61a07041 100644 --- a/src/core/src/errors.rs +++ b/src/core/src/errors.rs @@ -49,6 +49,9 @@ pub enum SourmashError { #[error("error while calculating ANI confidence intervals: {message}")] ANIEstimationError { message: String }, + #[error("collection is not a superset")] + CollectionNotSuperset, + #[error(transparent)] ReadDataError(#[from] ReadDataError), @@ -104,6 +107,7 @@ pub enum SourmashErrorCode { NonEmptyMinHash = 1_06, MismatchNum = 1_07, NeedsAbundanceTracking = 1_08, + CollectionNotSuperset = 1_09, // Input sequence errors InvalidDNA = 11_01, InvalidProt = 11_02, @@ -155,6 +159,7 @@ impl SourmashErrorCode { SourmashError::NifflerError { .. } => SourmashErrorCode::NifflerError, SourmashError::Utf8Error { .. } => SourmashErrorCode::Utf8Error, SourmashError::CsvError { .. } => SourmashErrorCode::CsvError, + SourmashError::CollectionNotSuperset { .. } => SourmashErrorCode::CollectionNotSuperset, #[cfg(not(target_arch = "wasm32"))] #[cfg(feature = "branchwater")] From d467d9f16a75134d5b3d9c0683ebc52df5659690 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Thu, 22 Aug 2024 08:28:36 -0700 Subject: [PATCH 07/42] switch Manifest from paths to TryFrom --- src/core/src/collection.rs | 4 +++- src/core/src/manifest.rs | 29 ++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 38312f6de1..5d72f5b619 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -177,8 +177,10 @@ impl Collection { // TODO: // - figure out if there is a common path between sigs for FSStorage? + let manifest: Manifest = paths.try_into()?; + Ok(Self { - manifest: paths.into(), + manifest, storage: InnerStorage::new( FSStorage::builder() .fullpath("".into()) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 0a84a3e495..b79308a67c 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -288,8 +288,10 @@ impl From> for Manifest { } } -impl From<&[PathBuf]> for Manifest { - fn from(paths: &[PathBuf]) -> Self { +impl TryFrom<&[PathBuf]> for Manifest { + type Error = crate::Error; + + fn try_from(paths: &[PathBuf]) -> Result { #[cfg(feature = "parallel")] let iter = paths.par_iter(); @@ -298,21 +300,30 @@ impl From<&[PathBuf]> for Manifest { let records: Vec = iter .flat_map(|p| { - let recs: Vec = Signature::from_path(p) - .unwrap_or_else(|_| panic!("Error processing {:?}", p)) - .into_iter() + let sigs: Result> = Signature::from_path(p); + // @CTB need to get this error out :think: + let sigs: Vec = sigs.unwrap(); + let recs: Vec = sigs + .iter() .flat_map(|v| Record::from_sig(&v, p.as_str())) .collect(); recs }) .collect(); - Manifest { records } + let records: Result> = Ok(records); + + match records { + Ok(records) => Ok(Manifest { records }), + Err(x) => Err(crate::Error::MismatchKSizes) + } } } -impl From<&PathBuf> for Manifest { - fn from(pathlist: &PathBuf) -> Self { +impl TryFrom<&PathBuf> for Manifest { + type Error = crate::Error; + + fn try_from(pathlist: &PathBuf) -> Result { let file = File::open(pathlist).unwrap_or_else(|_| panic!("Failed to open {:?}", pathlist)); let reader = BufReader::new(file); @@ -322,7 +333,7 @@ impl From<&PathBuf> for Manifest { .map(PathBuf::from) .collect(); - paths.as_slice().into() + paths.as_slice().try_into() } } From 5f1eef619ddaede316a1b809fa767c47e0c23d89 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Thu, 22 Aug 2024 09:30:15 -0700 Subject: [PATCH 08/42] Revert "provide correct error" This reverts commit 53bcf0274f4b7bdec81077b44591af3a932a0270. --- src/core/src/collection.rs | 3 ++- src/core/src/errors.rs | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 5d72f5b619..cf73cdc8d3 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -121,7 +121,8 @@ impl Collection { .zip(other.iter()) .all(|((id1, rec1), (id2, rec2))| id1 == id2 && rec1 == rec2) .then(|| self.len()) - .ok_or(Error::CollectionNotSuperset) + // TODO: right error here + .ok_or(Error::MismatchKSizes) } pub fn from_zipfile>(zipfile: P) -> Result { diff --git a/src/core/src/errors.rs b/src/core/src/errors.rs index 1a61a07041..90c028eb38 100644 --- a/src/core/src/errors.rs +++ b/src/core/src/errors.rs @@ -49,9 +49,6 @@ pub enum SourmashError { #[error("error while calculating ANI confidence intervals: {message}")] ANIEstimationError { message: String }, - #[error("collection is not a superset")] - CollectionNotSuperset, - #[error(transparent)] ReadDataError(#[from] ReadDataError), @@ -107,7 +104,6 @@ pub enum SourmashErrorCode { NonEmptyMinHash = 1_06, MismatchNum = 1_07, NeedsAbundanceTracking = 1_08, - CollectionNotSuperset = 1_09, // Input sequence errors InvalidDNA = 11_01, InvalidProt = 11_02, @@ -159,7 +155,6 @@ impl SourmashErrorCode { SourmashError::NifflerError { .. } => SourmashErrorCode::NifflerError, SourmashError::Utf8Error { .. } => SourmashErrorCode::Utf8Error, SourmashError::CsvError { .. } => SourmashErrorCode::CsvError, - SourmashError::CollectionNotSuperset { .. } => SourmashErrorCode::CollectionNotSuperset, #[cfg(not(target_arch = "wasm32"))] #[cfg(feature = "branchwater")] From eb46ecde66d5d6b6e6802f54bf99fff72c143038 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Thu, 22 Aug 2024 09:32:40 -0700 Subject: [PATCH 09/42] Revert "switch Manifest from paths to TryFrom" This reverts commit d467d9f16a75134d5b3d9c0683ebc52df5659690. --- src/core/src/collection.rs | 4 +--- src/core/src/manifest.rs | 29 +++++++++-------------------- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index cf73cdc8d3..bc03274c41 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -178,10 +178,8 @@ impl Collection { // TODO: // - figure out if there is a common path between sigs for FSStorage? - let manifest: Manifest = paths.try_into()?; - Ok(Self { - manifest, + manifest: paths.into(), storage: InnerStorage::new( FSStorage::builder() .fullpath("".into()) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index b79308a67c..0a84a3e495 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -288,10 +288,8 @@ impl From> for Manifest { } } -impl TryFrom<&[PathBuf]> for Manifest { - type Error = crate::Error; - - fn try_from(paths: &[PathBuf]) -> Result { +impl From<&[PathBuf]> for Manifest { + fn from(paths: &[PathBuf]) -> Self { #[cfg(feature = "parallel")] let iter = paths.par_iter(); @@ -300,30 +298,21 @@ impl TryFrom<&[PathBuf]> for Manifest { let records: Vec = iter .flat_map(|p| { - let sigs: Result> = Signature::from_path(p); - // @CTB need to get this error out :think: - let sigs: Vec = sigs.unwrap(); - let recs: Vec = sigs - .iter() + let recs: Vec = Signature::from_path(p) + .unwrap_or_else(|_| panic!("Error processing {:?}", p)) + .into_iter() .flat_map(|v| Record::from_sig(&v, p.as_str())) .collect(); recs }) .collect(); - let records: Result> = Ok(records); - - match records { - Ok(records) => Ok(Manifest { records }), - Err(x) => Err(crate::Error::MismatchKSizes) - } + Manifest { records } } } -impl TryFrom<&PathBuf> for Manifest { - type Error = crate::Error; - - fn try_from(pathlist: &PathBuf) -> Result { +impl From<&PathBuf> for Manifest { + fn from(pathlist: &PathBuf) -> Self { let file = File::open(pathlist).unwrap_or_else(|_| panic!("Failed to open {:?}", pathlist)); let reader = BufReader::new(file); @@ -333,7 +322,7 @@ impl TryFrom<&PathBuf> for Manifest { .map(PathBuf::from) .collect(); - paths.as_slice().try_into() + paths.as_slice().into() } } From 923af44f713b0d7f61449de1df0911035e9d129c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 23 Aug 2024 15:07:56 -0700 Subject: [PATCH 10/42] poor person's picklist? --- src/core/src/manifest.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 0a84a3e495..e09c178e4b 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -8,6 +8,7 @@ use getset::{CopyGetters, Getters, Setters}; use rayon::prelude::*; use serde::de; use serde::{Deserialize, Serialize}; +use std::collections::HashSet; use crate::encodings::HashFunctions; use crate::prelude::*; @@ -209,6 +210,18 @@ impl Manifest { pub fn iter(&self) -> impl Iterator { self.records.iter() } + + pub fn len(&self) -> usize { + self.records.len() + } + + pub fn match_picklist(self, pick: HashSet<(&str, &str)>) -> Result { + let rows = self.records.iter().filter(|row| { + pick.contains((row.name().as_str(), row.md5().as_str())) + }).collect(); + + Ok(Self { rows }) + } } impl Select for Manifest { From 315dfffe25720dda387116574cd58feb66c9afa6 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 23 Aug 2024 15:17:53 -0700 Subject: [PATCH 11/42] add picklist select --- src/core/src/collection.rs | 7 +++++++ src/core/src/manifest.rs | 10 +++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index bc03274c41..ef37fe3ced 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -2,6 +2,7 @@ use std::ops::{Deref, DerefMut}; use camino::Utf8Path as Path; use camino::Utf8PathBuf as PathBuf; +use std::collections::HashSet; use crate::encodings::Idx; use crate::manifest::{Manifest, Record}; @@ -215,6 +216,12 @@ impl Collection { assert_eq!(sig.signatures.len(), 1); Ok(sig) } + + pub fn select_picklist(&self, pick: HashSet<(&str, &str)>) -> Self { + // @CTB: why do we need this clone here? + let manifest = self.manifest.clone().select_picklist(pick); + Self { manifest, storage: self.storage.clone() } + } } impl Select for Collection { diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index e09c178e4b..eea5fd5bbf 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -215,12 +215,12 @@ impl Manifest { self.records.len() } - pub fn match_picklist(self, pick: HashSet<(&str, &str)>) -> Result { - let rows = self.records.iter().filter(|row| { - pick.contains((row.name().as_str(), row.md5().as_str())) - }).collect(); + pub fn select_picklist(self, pick: HashSet<(&str, &str)>) -> Self { + let records = self.records.iter().filter(|row| { + pick.contains(&(row.name().as_str(), row.md5().as_str())) + }).cloned().collect(); - Ok(Self { rows }) + Self { records } } } From 1fb658a04b4ddf143a63507443d1e8c014bc8ee0 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 23 Aug 2024 15:38:52 -0700 Subject: [PATCH 12/42] ok --- src/core/src/collection.rs | 2 +- src/core/src/manifest.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index ef37fe3ced..b897624423 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -217,7 +217,7 @@ impl Collection { Ok(sig) } - pub fn select_picklist(&self, pick: HashSet<(&str, &str)>) -> Self { + pub fn select_picklist(&self, pick: HashSet<(String, String)>) -> Self { // @CTB: why do we need this clone here? let manifest = self.manifest.clone().select_picklist(pick); Self { manifest, storage: self.storage.clone() } diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index eea5fd5bbf..9a52396ce3 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -215,9 +215,9 @@ impl Manifest { self.records.len() } - pub fn select_picklist(self, pick: HashSet<(&str, &str)>) -> Self { + pub fn select_picklist(self, pick: HashSet<(String, String)>) -> Self { let records = self.records.iter().filter(|row| { - pick.contains(&(row.name().as_str(), row.md5().as_str())) + pick.contains(&(row.name().clone(), row.md5().clone())) }).cloned().collect(); Self { records } From a1229825c121fe69d75925cf569620e0e56425ab Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 23 Aug 2024 15:48:33 -0700 Subject: [PATCH 13/42] picklist by ref --- src/core/src/collection.rs | 2 +- src/core/src/manifest.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index b897624423..06e0f7f37f 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -217,7 +217,7 @@ impl Collection { Ok(sig) } - pub fn select_picklist(&self, pick: HashSet<(String, String)>) -> Self { + pub fn select_picklist(&self, pick: &HashSet<(String, String)>) -> Self { // @CTB: why do we need this clone here? let manifest = self.manifest.clone().select_picklist(pick); Self { manifest, storage: self.storage.clone() } diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 9a52396ce3..d32254fae2 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -215,7 +215,7 @@ impl Manifest { self.records.len() } - pub fn select_picklist(self, pick: HashSet<(String, String)>) -> Self { + pub fn select_picklist(self, pick: &HashSet<(String, String)>) -> Self { let records = self.records.iter().filter(|row| { pick.contains(&(row.name().clone(), row.md5().clone())) }).cloned().collect(); From 39816ab87ad3d7df7452312a3900e169488aab40 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 23 Aug 2024 16:21:11 -0700 Subject: [PATCH 14/42] add manifest.is_empty() --- src/core/src/manifest.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index d32254fae2..4756f924be 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -215,6 +215,10 @@ impl Manifest { self.records.len() } + pub fn is_empty(&self) -> bool { + self.records.len() == 0 + } + pub fn select_picklist(self, pick: &HashSet<(String, String)>) -> Self { let records = self.records.iter().filter(|row| { pick.contains(&(row.name().clone(), row.md5().clone())) From 61416fb2d81bef3c00d2185913b9f7290a110904 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 06:16:47 -0700 Subject: [PATCH 15/42] update revindex indexing message --- src/core/src/index/revindex/disk_revindex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/index/revindex/disk_revindex.rs b/src/core/src/index/revindex/disk_revindex.rs index 511bdbcc4c..ac36c5b626 100644 --- a/src/core/src/index/revindex/disk_revindex.rs +++ b/src/core/src/index/revindex/disk_revindex.rs @@ -114,7 +114,7 @@ impl RevIndex { info!("Compact SSTs"); index.compact(); - info!("Processed {} reference sigs", processed_sigs.into_inner()); + info!("Done! Processed {} reference sigs", processed_sigs.into_inner()); Ok(module::RevIndex::Plain(index)) } From 3a7abe9484bce884f0f536c75c28c97fb683e265 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 06:43:35 -0700 Subject: [PATCH 16/42] propagate error on bad directory when opening RocksDB --- src/core/src/index/revindex/mod.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/core/src/index/revindex/mod.rs b/src/core/src/index/revindex/mod.rs index aac0d47c14..013c17081b 100644 --- a/src/core/src/index/revindex/mod.rs +++ b/src/core/src/index/revindex/mod.rs @@ -186,7 +186,7 @@ impl RevIndex { pub fn open>(index: P, read_only: bool, spec: Option<&str>) -> Result { let opts = db_options(); - let cfs = DB::list_cf(&opts, index.as_ref()).unwrap(); + let cfs = DB::list_cf(&opts, index.as_ref())?; if cfs.into_iter().any(|c| c == COLORS) { // TODO: ColorRevIndex can't be read-only for now, @@ -1020,4 +1020,14 @@ mod test { Ok(()) } + + #[test] + fn rocksdb_storage_fail_bad_directory() -> Result<()> { + let testdir = TempDir::new()?; + + match RevIndex::open(testdir, true, None) { + Err(_) => Ok(()), + Ok(_) => panic!("test should not reach here"), + } + } } From fe75c6cc762bb2d1edf26e936ae14b3725d807d7 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 06:59:11 -0700 Subject: [PATCH 17/42] do we not need len? --- src/core/src/manifest.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 4756f924be..276a89f46a 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -211,14 +211,14 @@ impl Manifest { self.records.iter() } - pub fn len(&self) -> usize { +/* pub fn len(&self) -> usize { self.records.len() } pub fn is_empty(&self) -> bool { self.records.len() == 0 } - +*/ pub fn select_picklist(self, pick: &HashSet<(String, String)>) -> Self { let records = self.records.iter().filter(|row| { pick.contains(&(row.name().clone(), row.md5().clone())) From 5fb20fc84a8c0f25c2071e49d9c0728cf8f106b7 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 07:08:35 -0700 Subject: [PATCH 18/42] nope, don't need em --- src/core/src/manifest.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 276a89f46a..47e06be7a2 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -211,14 +211,6 @@ impl Manifest { self.records.iter() } -/* pub fn len(&self) -> usize { - self.records.len() - } - - pub fn is_empty(&self) -> bool { - self.records.len() == 0 - } -*/ pub fn select_picklist(self, pick: &HashSet<(String, String)>) -> Self { let records = self.records.iter().filter(|row| { pick.contains(&(row.name().clone(), row.md5().clone())) From 2c590102f97b12284f40ce4cfbdfe8ef9bd54342 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 07:13:58 -0700 Subject: [PATCH 19/42] revert for now --- src/core/src/storage/mod.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 856f8ff210..12f456fc22 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -657,11 +657,7 @@ impl Storage for MemStorage { } fn load_sig(&self, path: &str) -> Result { - let x = self.sigs.read().unwrap(); - match x.get(path) { - Some(path) => Ok(path.clone()), - None => panic!("cannot get path '{path}'") - } + Ok(self.sigs.read().unwrap().get(path).unwrap().clone()) } fn save_sig(&self, path: &str, sig: Signature) -> Result { From 6fee40340e8eccb94a89c6d39412e4d88e974253 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 27 Aug 2024 06:33:35 -0700 Subject: [PATCH 20/42] adjust select_picklist per luiz --- src/core/src/collection.rs | 3 +-- src/core/src/manifest.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 06e0f7f37f..53755f7e08 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -218,8 +218,7 @@ impl Collection { } pub fn select_picklist(&self, pick: &HashSet<(String, String)>) -> Self { - // @CTB: why do we need this clone here? - let manifest = self.manifest.clone().select_picklist(pick); + let manifest = self.manifest.select_picklist(pick); Self { manifest, storage: self.storage.clone() } } } diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 47e06be7a2..000717992d 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -211,7 +211,7 @@ impl Manifest { self.records.iter() } - pub fn select_picklist(self, pick: &HashSet<(String, String)>) -> Self { + pub fn select_picklist(&self, pick: &HashSet<(String, String)>) -> Self { let records = self.records.iter().filter(|row| { pick.contains(&(row.name().clone(), row.md5().clone())) }).cloned().collect(); From 06754027ef00170f6aab1655e9fc14df434a6806 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 27 Aug 2024 06:48:44 -0700 Subject: [PATCH 21/42] simplify and encapsulate --- src/core/src/collection.rs | 5 ++--- src/core/src/manifest.rs | 13 +++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 53755f7e08..843be43fe0 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -2,7 +2,6 @@ use std::ops::{Deref, DerefMut}; use camino::Utf8Path as Path; use camino::Utf8PathBuf as PathBuf; -use std::collections::HashSet; use crate::encodings::Idx; use crate::manifest::{Manifest, Record}; @@ -217,8 +216,8 @@ impl Collection { Ok(sig) } - pub fn select_picklist(&self, pick: &HashSet<(String, String)>) -> Self { - let manifest = self.manifest.select_picklist(pick); + pub fn intersect_manifest(&self, mf: &Manifest) -> Self { + let manifest = self.manifest.intersect_manifest(mf); Self { manifest, storage: self.storage.clone() } } } diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 000717992d..1fb22f83e5 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -211,9 +211,18 @@ impl Manifest { self.records.iter() } - pub fn select_picklist(&self, pick: &HashSet<(String, String)>) -> Self { + pub fn intersect_manifest(&self, other: &Manifest) -> Self { + // @CTB: do we want to key on other things, like ksize, moltype, hash? + // As long as we avoid internal_location we should be fine... + + // extract tuples from other mf: + let pairs: HashSet<_> = other + .iter() + .map(|r| (r.name(), r.md5())) + .collect(); + let records = self.records.iter().filter(|row| { - pick.contains(&(row.name().clone(), row.md5().clone())) + pairs.contains(&(row.name(), row.md5())) }).cloned().collect(); Self { records } From 17f50efcf45f6b484d0c4e2f807a2bc2903538fd Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 27 Aug 2024 07:02:49 -0700 Subject: [PATCH 22/42] cargo fmt --- src/core/src/collection.rs | 5 ++++- src/core/src/manifest.rs | 13 +++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 843be43fe0..f7168273b5 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -218,7 +218,10 @@ impl Collection { pub fn intersect_manifest(&self, mf: &Manifest) -> Self { let manifest = self.manifest.intersect_manifest(mf); - Self { manifest, storage: self.storage.clone() } + Self { + manifest, + storage: self.storage.clone(), + } } } diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 1fb22f83e5..147f8d63a0 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -216,15 +216,16 @@ impl Manifest { // As long as we avoid internal_location we should be fine... // extract tuples from other mf: - let pairs: HashSet<_> = other + let pairs: HashSet<_> = other.iter().map(|r| (r.name(), r.md5())).collect(); + + // @CTB use par_iter here, optionally? + let records = self + .records .iter() - .map(|r| (r.name(), r.md5())) + .filter(|row| pairs.contains(&(row.name(), row.md5()))) + .cloned() .collect(); - let records = self.records.iter().filter(|row| { - pairs.contains(&(row.name(), row.md5())) - }).cloned().collect(); - Self { records } } } From 39c140bf06ced4d0125eb283e6889fc88119ecbf Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 10:03:32 -0700 Subject: [PATCH 23/42] add a test of intersect_manifest --- src/core/src/manifest.rs | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 147f8d63a0..ef7d7eec6a 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -540,4 +540,62 @@ mod test { let scaled100 = manifest.select(&selection).unwrap(); assert_eq!(scaled100.len(), 6); } + + #[test] + fn manifest_intersect() { + let temp_dir = TempDir::new().unwrap(); + let utf8_output = PathBuf::from_path_buf(temp_dir.path().to_path_buf()) + .expect("Path should be valid UTF-8"); + let filename = utf8_output.join("sig-pathlist.txt"); + // build sig filenames + let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let test_sigs = vec![ + "../../tests/test-data/47.fa.sig", + "../../tests/test-data/63.fa.sig", + ]; + + let full_paths: Vec<_> = test_sigs + .into_iter() + .map(|sig| base_path.join(sig)) + .collect(); + + // write a file in test directory with a filename on each line + let mut pathfile = File::create(&filename).unwrap(); + for sigfile in &full_paths { + writeln!(pathfile, "{}", sigfile).unwrap(); + } + + // load into manifest + let manifest = Manifest::from(&filename); + assert_eq!(manifest.len(), 2); + + // now do just one sketch - + let test_sigs2 = vec![ + "../../tests/test-data/63.fa.sig", + ]; + + let filename2 = utf8_output.join("sig-pathlist-single.txt"); + + let full_paths: Vec<_> = test_sigs2 + .into_iter() + .map(|sig| base_path.join(sig)) + .collect(); + + let mut pathfile2 = File::create(&filename2).unwrap(); + for sigfile in &full_paths { + writeln!(pathfile2, "{}", sigfile).unwrap(); + } + + // load into another manifest + let manifest2 = Manifest::from(&filename2); + assert_eq!(manifest2.len(), 1); + + // intersect with itself => same. + let new_mf = manifest2.intersect_manifest(&manifest); + assert_eq!(new_mf.len(), 1); + + // intersect with other => single. + let new_mf = manifest.intersect_manifest(&manifest2); + assert_eq!(new_mf.len(), 1); + } } From 16f72c5b3eaca7e280b8df67061b7c021aba1a57 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 10:43:10 -0700 Subject: [PATCH 24/42] impl PartialEq/Eq for Record, ignoring internal_location --- src/core/src/manifest.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index ef7d7eec6a..bb13752bdb 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -18,7 +18,7 @@ use crate::Result; /// Individual manifest record, containing information about sketches. -#[derive(Debug, Serialize, Deserialize, Clone, CopyGetters, Getters, Setters, PartialEq, Eq)] +#[derive(Debug, Serialize, Deserialize, Clone, CopyGetters, Getters, Setters)] pub struct Record { #[getset(get = "pub", set = "pub")] internal_location: PathBuf, @@ -177,6 +177,21 @@ impl Record { } } +impl PartialEq for Record { + // match everything but internal_location + fn eq(&self, other: &Self) -> bool { + self.md5 == other.md5 && + self.ksize == other.ksize && + self.scaled == other.scaled && + self.n_hashes == other.n_hashes && + self.with_abundance == other.with_abundance && + self.name == other.name && + self.filename == other.filename + } +} + +impl Eq for Record {} + impl Manifest { pub fn from_reader(rdr: R) -> Result { let mut records = vec![]; From 2146f307f239705146a5f9e8860653e6f9d551a9 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 17:18:04 -0700 Subject: [PATCH 25/42] switch to using full Record for intersect_manifest --- src/core/src/manifest.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index bb13752bdb..edc1edae6b 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -1,6 +1,8 @@ use std::fs::File; use std::io::{BufRead, BufReader, Read, Write}; use std::ops::Deref; +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; use camino::Utf8PathBuf as PathBuf; use getset::{CopyGetters, Getters, Setters}; @@ -8,7 +10,6 @@ use getset::{CopyGetters, Getters, Setters}; use rayon::prelude::*; use serde::de; use serde::{Deserialize, Serialize}; -use std::collections::HashSet; use crate::encodings::HashFunctions; use crate::prelude::*; @@ -192,6 +193,16 @@ impl PartialEq for Record { impl Eq for Record {} +impl Hash for Record { // @CTB moltype, other things? test compare empty. + fn hash(&self, state: &mut H) { + self.md5.hash(state); + self.scaled.hash(state); + self.with_abundance.hash(state); + self.name.hash(state); + self.filename.hash(state); + } +} + impl Manifest { pub fn from_reader(rdr: R) -> Result { let mut records = vec![]; @@ -227,17 +238,14 @@ impl Manifest { } pub fn intersect_manifest(&self, other: &Manifest) -> Self { - // @CTB: do we want to key on other things, like ksize, moltype, hash? - // As long as we avoid internal_location we should be fine... - // extract tuples from other mf: - let pairs: HashSet<_> = other.iter().map(|r| (r.name(), r.md5())).collect(); + let pairs: HashSet<_> = other.iter().map(|r| r).collect(); // @CTB use par_iter here, optionally? let records = self .records .iter() - .filter(|row| pairs.contains(&(row.name(), row.md5()))) + .filter(|row| pairs.contains(row)) .cloned() .collect(); From 32839ae5a5ee35442abc6002728b25454d965c70 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 18:05:59 -0700 Subject: [PATCH 26/42] round out comparison & hashing for Record --- src/core/src/manifest.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index edc1edae6b..ee95415e2d 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -183,7 +183,9 @@ impl PartialEq for Record { fn eq(&self, other: &Self) -> bool { self.md5 == other.md5 && self.ksize == other.ksize && + self.moltype == other.moltype && self.scaled == other.scaled && + self.num == other.num && self.n_hashes == other.n_hashes && self.with_abundance == other.with_abundance && self.name == other.name && @@ -193,10 +195,14 @@ impl PartialEq for Record { impl Eq for Record {} -impl Hash for Record { // @CTB moltype, other things? test compare empty. +impl Hash for Record { fn hash(&self, state: &mut H) { self.md5.hash(state); + self.ksize.hash(state); + self.moltype.hash(state); self.scaled.hash(state); + self.num.hash(state); + self.n_hashes.hash(state); self.with_abundance.hash(state); self.name.hash(state); self.filename.hash(state); @@ -241,7 +247,6 @@ impl Manifest { // extract tuples from other mf: let pairs: HashSet<_> = other.iter().map(|r| r).collect(); - // @CTB use par_iter here, optionally? let records = self .records .iter() From 43ee757212d18934d9caf79ab1f0b823067b051e Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 18:18:33 -0700 Subject: [PATCH 27/42] cargo fmt --- src/core/src/manifest.rs | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index ee95415e2d..cb1f47487d 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -1,8 +1,8 @@ +use std::collections::HashSet; use std::fs::File; +use std::hash::{Hash, Hasher}; use std::io::{BufRead, BufReader, Read, Write}; use std::ops::Deref; -use std::collections::HashSet; -use std::hash::{Hash, Hasher}; use camino::Utf8PathBuf as PathBuf; use getset::{CopyGetters, Getters, Setters}; @@ -181,15 +181,15 @@ impl Record { impl PartialEq for Record { // match everything but internal_location fn eq(&self, other: &Self) -> bool { - self.md5 == other.md5 && - self.ksize == other.ksize && - self.moltype == other.moltype && - self.scaled == other.scaled && - self.num == other.num && - self.n_hashes == other.n_hashes && - self.with_abundance == other.with_abundance && - self.name == other.name && - self.filename == other.filename + self.md5 == other.md5 + && self.ksize == other.ksize + && self.moltype == other.moltype + && self.scaled == other.scaled + && self.num == other.num + && self.n_hashes == other.n_hashes + && self.with_abundance == other.with_abundance + && self.name == other.name + && self.filename == other.filename } } @@ -598,9 +598,7 @@ mod test { assert_eq!(manifest.len(), 2); // now do just one sketch - - let test_sigs2 = vec![ - "../../tests/test-data/63.fa.sig", - ]; + let test_sigs2 = vec!["../../tests/test-data/63.fa.sig"]; let filename2 = utf8_output.join("sig-pathlist-single.txt"); From 1594dc48929acbfa6be5a43ed04e57cce8cdb65f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 16 Sep 2024 09:34:30 -0700 Subject: [PATCH 28/42] remove identity closure --- src/core/src/manifest.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index cb1f47487d..2f1eca7e0f 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -245,7 +245,7 @@ impl Manifest { pub fn intersect_manifest(&self, other: &Manifest) -> Self { // extract tuples from other mf: - let pairs: HashSet<_> = other.iter().map(|r| r).collect(); + let pairs: HashSet<_> = other.iter().collect(); let records = self .records From 2465c02198f251c923c10d51fbfaae1967f44b0d Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 16 Sep 2024 14:22:51 -0700 Subject: [PATCH 29/42] add in a print for debugging --- src/core/src/storage/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 12f456fc22..f25ffdc596 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -469,6 +469,7 @@ impl Select for SigStore { fn select(mut self, selection: &Selection) -> Result { // TODO: find better error let sig = self.data.take().ok_or(Error::MismatchKSizes)?; + eprintln!("YYY {:?}", sig); self.data = OnceCell::with_value(sig.select(selection)?); Ok(self) } From 5af5f4566cdba5f79f1cb26385554f392995a6da Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 16 Sep 2024 14:24:57 -0700 Subject: [PATCH 30/42] more print --- src/core/src/signature.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 0ab8190f98..a8975c762a 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -795,6 +795,7 @@ impl ToWriter for Signature { impl Select for Signature { fn select(mut self, selection: &Selection) -> Result { + eprintln!("ZZZ: {}", self.signatures.len()); self.signatures.retain(|s| { let mut valid = true; valid = if let Some(ksize) = selection.ksize() { From b46565d67478be53854f15a68176981f1ba91100 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 16 Sep 2024 14:28:26 -0700 Subject: [PATCH 31/42] remove prints --- src/core/src/signature.rs | 1 - src/core/src/storage/mod.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index a8975c762a..0ab8190f98 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -795,7 +795,6 @@ impl ToWriter for Signature { impl Select for Signature { fn select(mut self, selection: &Selection) -> Result { - eprintln!("ZZZ: {}", self.signatures.len()); self.signatures.retain(|s| { let mut valid = true; valid = if let Some(ksize) = selection.ksize() { diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index f25ffdc596..12f456fc22 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -469,7 +469,6 @@ impl Select for SigStore { fn select(mut self, selection: &Selection) -> Result { // TODO: find better error let sig = self.data.take().ok_or(Error::MismatchKSizes)?; - eprintln!("YYY {:?}", sig); self.data = OnceCell::with_value(sig.select(selection)?); Ok(self) } From 9c267528977fab0bb04dc1da86c47290bff68bf8 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Sep 2024 08:34:15 -0700 Subject: [PATCH 32/42] add a test for Collection::intersect_manifest --- src/core/src/collection.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index f7168273b5..fdd47a1f58 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -245,6 +245,7 @@ mod test { use crate::selection::Selection; use crate::signature::Signature; use crate::Result; + use crate::manifest::Manifest; #[test] fn sigstore_selection_with_downsample() { @@ -366,6 +367,33 @@ mod test { assert_eq!(cl.len(), 0); } + #[test] + fn collection_intersect_manifest() { + // load test sigs + let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + // four num=500 sigs + filename.push("../../tests/test-data/genome-s11.fa.gz.sig"); + let file = File::open(filename).unwrap(); + let reader = BufReader::new(file); + let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + assert_eq!(sigs.len(), 4); + // load sigs into collection + select compatible signatures + let cl = Collection::from_sigs(sigs) + .unwrap(); + // all sigs should remain + assert_eq!(cl.len(), 4); + + // grab first record + let manifest = cl.manifest(); + let record = manifest.iter().next().unwrap().clone(); + let vr = vec![record]; + + // now intersect: + let manifest2 = Manifest::from(vr); + let cl2 = cl.intersect_manifest(&manifest2); + assert_eq!(cl2.len(), 1); + } + #[test] fn sigstore_sig_from_record() { // load test sigs From 63df7a828f5a1cd909db650bebaae937ca74f561 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Sep 2024 12:22:58 -0700 Subject: [PATCH 33/42] panic in bad circumstances --- src/core/src/storage/mod.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 12f456fc22..38bfdd507d 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -298,9 +298,10 @@ impl Storage for FSStorage { fn load_sig(&self, path: &str) -> Result { let raw = self.load(path)?; - let sig = Signature::from_reader(&mut &raw[..])? - // TODO: select the right sig? - .swap_remove(0); + + let mut vs = Signature::from_reader(&mut &raw[..])?; + if vs.len() > 1 { panic!("more than one sig."); } + let sig = vs.swap_remove(0); Ok(sig.into()) } From 4c29e9ec221241d94602493e26c304cbc5ab917c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 24 Sep 2024 07:15:13 -0700 Subject: [PATCH 34/42] add unimplemented when there are multiple Signatures --- src/core/src/collection.rs | 2 +- src/core/src/storage/mod.rs | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 2493f313fc..66f3311dff 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -238,11 +238,11 @@ mod test { use crate::encodings::HashFunctions; use crate::manifest::Manifest; + use crate::manifest::Manifest; use crate::prelude::Select; use crate::selection::Selection; use crate::signature::Signature; use crate::Result; - use crate::manifest::Manifest; #[test] fn sigstore_selection_with_downsample() { diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 38bfdd507d..bae75c3b6a 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -33,9 +33,11 @@ pub trait Storage { /// Load signature from internal path fn load_sig(&self, path: &str) -> Result { let raw = self.load(path)?; - let sig = Signature::from_reader(&mut &raw[..])? - // TODO: select the right sig? - .swap_remove(0); + let mut vs = Signature::from_reader(&mut &raw[..])?; + if vs.len() > 1 { + unimplemented!("only one Signature currently allowed"); + } + let sig = vs.swap_remove(0); Ok(sig.into()) } @@ -300,7 +302,9 @@ impl Storage for FSStorage { let raw = self.load(path)?; let mut vs = Signature::from_reader(&mut &raw[..])?; - if vs.len() > 1 { panic!("more than one sig."); } + if vs.len() > 1 { + unimplemented!("only one Signature currently allowed"); + } let sig = vs.swap_remove(0); Ok(sig.into()) @@ -369,9 +373,11 @@ impl Storage for ZipStorage { fn load_sig(&self, path: &str) -> Result { let raw = self.load(path)?; - let sig = Signature::from_reader(&mut &raw[..])? - // TODO: select the right sig? - .swap_remove(0); + let mut vs = Signature::from_reader(&mut &raw[..])?; + if vs.len() > 1 { + unimplemented!("only one Signature currently allowed"); + } + let sig = vs.swap_remove(0); Ok(sig.into()) } From 70b1c5dbaf8dacdd6e52a22a5b7b627e6fb54f45 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 24 Sep 2024 07:26:57 -0700 Subject: [PATCH 35/42] upd --- src/core/src/collection.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 66f3311dff..669e7d5588 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -237,8 +237,6 @@ mod test { use super::Collection; use crate::encodings::HashFunctions; - use crate::manifest::Manifest; - use crate::manifest::Manifest; use crate::prelude::Select; use crate::selection::Selection; use crate::signature::Signature; From b95ea9a9ca422833cd2f7420977015171a2399c0 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 24 Sep 2024 07:36:42 -0700 Subject: [PATCH 36/42] ?? --- src/core/src/collection.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 669e7d5588..aa8e33e6e8 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -237,6 +237,7 @@ mod test { use super::Collection; use crate::encodings::HashFunctions; + use crate::manifest::Manifest; use crate::prelude::Select; use crate::selection::Selection; use crate::signature::Signature; From c5b2d4d368f90232a20d8fabb8d555b8e5b7b37a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 27 Oct 2024 07:35:30 -0700 Subject: [PATCH 37/42] document InnerStorage a bit --- src/core/src/storage/mod.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 47ef9d14a7..7e5914fe71 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -72,6 +72,15 @@ pub enum StorageError { MissingFeature(String, String), } +/// InnerStorage: a catch-all type that allows using any Storage in +/// parallel contexts. +/// +/// Arc allows ref counting to share it between threads; +/// RwLock makes sure there is only one writer possible (and a lot of readers); +/// dyn Storage so we can init with anything that implements the Storage trait; +/// Send + Sync + 'static is kind of a cheat to avoid lifetimes issues: we +/// should get rid of that 'static if possible... -- Luiz. + #[derive(Clone)] pub struct InnerStorage(Arc>); From f370ed78aa419eb9bcfbbe48fee5b0e632be9ea7 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 27 Oct 2024 09:19:40 -0700 Subject: [PATCH 38/42] document InnerStorage a bit --- src/core/src/storage/mod.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 7e5914fe71..97b676d370 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -77,9 +77,10 @@ pub enum StorageError { /// /// Arc allows ref counting to share it between threads; /// RwLock makes sure there is only one writer possible (and a lot of readers); -/// dyn Storage so we can init with anything that implements the Storage trait; -/// Send + Sync + 'static is kind of a cheat to avoid lifetimes issues: we -/// should get rid of that 'static if possible... -- Luiz. +/// dyn Storage so we can init with anything that implements the Storage trait. + +// Send + Sync + 'static is kind of a cheat to avoid lifetimes issues: we +// should get rid of that 'static if possible... -- Luiz. #[derive(Clone)] pub struct InnerStorage(Arc>); From 874a9a8542bed339819eacf6854337a22457b459 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 27 Oct 2024 09:47:16 -0700 Subject: [PATCH 39/42] finally, a test that works (well, fails appropriately) --- src/core/src/collection.rs | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 9ad2d891bc..ade03d9623 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -416,6 +416,52 @@ mod test { } } + // this test could probably be simpler + #[test] + fn sigstore_sig_from_record_2() { + use crate::manifest::Record; + use crate::storage::{ FSStorage, InnerStorage }; + use crate::selection::Selection; + + // load test sigs + let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + filename.push("../../tests/test-data/short.sig"); + + // CTB: this could probably be improved somehow... + let filename: String = filename.into(); + let filename_ref = filename.as_str(); + + // load signatures + let sigs = Signature::from_path(filename.clone()).expect("error loading"); + + // convert to records + let records: Vec = sigs + .into_iter() + .flat_map(|v| Record::from_sig(&v, filename_ref)) + .collect(); + + eprintln!("{:?}", records); + + // build a new collection using this manifest + let manifest: Manifest = records.into(); + let collection = Collection::new(manifest, + InnerStorage::new( + FSStorage::builder() + .fullpath("../../tests/test-data/".into()) + .subdir("".into()) + .build()) + ); + + let selection = Selection::builder().build(); + + for (_idx, rec) in collection.iter() { + // need to pass select again here so we actually downsample + let this_sig = collection.sig_from_record(rec).unwrap().select(&selection).unwrap(); + let this_mh = this_sig.minhash().unwrap(); + assert_eq!(this_mh.scaled(), 2000); + } + } + #[test] fn sigstore_selection_moltype_zip() { // load test sigs From 0745990ab6f022cebba84b1a5b0ac43b768020c4 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 27 Oct 2024 11:00:23 -0700 Subject: [PATCH 40/42] cleanup --- src/core/src/collection.rs | 35 +++++++++++++++++------------------ src/core/src/storage/mod.rs | 2 +- tests/test-data/short.sig.gz | Bin 0 -> 10995 bytes 3 files changed, 18 insertions(+), 19 deletions(-) create mode 100644 tests/test-data/short.sig.gz diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index ade03d9623..25803d6de2 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -420,21 +420,19 @@ mod test { #[test] fn sigstore_sig_from_record_2() { use crate::manifest::Record; - use crate::storage::{ FSStorage, InnerStorage }; use crate::selection::Selection; + use crate::storage::{FSStorage, InnerStorage}; // load test sigs let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - filename.push("../../tests/test-data/short.sig"); - - // CTB: this could probably be improved somehow... + filename.push("../../tests/test-data/short.sig.gz"); let filename: String = filename.into(); let filename_ref = filename.as_str(); // load signatures let sigs = Signature::from_path(filename.clone()).expect("error loading"); - // convert to records + // convert to records, loaded from 'short.sig.gz'. let records: Vec = sigs .into_iter() .flat_map(|v| Record::from_sig(&v, filename_ref)) @@ -444,22 +442,23 @@ mod test { // build a new collection using this manifest let manifest: Manifest = records.into(); - let collection = Collection::new(manifest, - InnerStorage::new( - FSStorage::builder() - .fullpath("../../tests/test-data/".into()) - .subdir("".into()) - .build()) + let collection = Collection::new( + manifest, + InnerStorage::new( + FSStorage::builder() + .fullpath("../../tests/test-data/".into()) + .subdir("".into()) + .build(), + ), ); - let selection = Selection::builder().build(); + // pull off first record + let v: Vec<_> = collection.iter().collect(); + let (_idx, rec) = v.first().expect("no records in collection?!"); - for (_idx, rec) in collection.iter() { - // need to pass select again here so we actually downsample - let this_sig = collection.sig_from_record(rec).unwrap().select(&selection).unwrap(); - let this_mh = this_sig.minhash().unwrap(); - assert_eq!(this_mh.scaled(), 2000); - } + // this will panic with "unimplemented" because there are two + // sketches and that is not supported. + let _first_sig = collection.sig_from_record(rec).expect("no sig!?"); } #[test] diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index 97b676d370..e098f58eba 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -314,7 +314,7 @@ impl Storage for FSStorage { let mut vs = Signature::from_reader(&mut &raw[..])?; if vs.len() > 1 { - unimplemented!("only one Signature currently allowed"); + unimplemented!("only one Signature currently allowed when using 'load_sig'"); } let sig = vs.swap_remove(0); diff --git a/tests/test-data/short.sig.gz b/tests/test-data/short.sig.gz new file mode 100644 index 0000000000000000000000000000000000000000..84047034dc965758f24d9889e8f54b813fe40ef5 GIT binary patch literal 10995 zcmVku-!6kiIgZ_N}yCx_nebM3_Es2=0E<+htIzH_}zCOe*59O zKYah~*B`(8{Xc*A<$wO>;~&5O_S1*w!>6x5e)$!D$^ZSmfA_`r-+cDRFaPijqVk)s zdEonR*WfR|`0}ey-+cV~lkfTcAHMzLZ@&15^s6sF`}ECs3jg6B3V;5eZ{~ad_%9#6 z`Tpwg4ag z{KmJOTF>eI=CAu`wtlZ>#;xr=3=Mt-Y_ZXXR(*VMKgk-d@@m zZJ&i`o;r8Ce1!>)(Z|CJnaVhOY}Tc)0Eq5b_TKze8MQY@vF|gNV_p0`GiM{2&?<{} ztTOlE_p6TcEM{9-t$UejzkSzdjXMfU+}GNS&UfumSz|_k0`%)6&z@zCS=a$)!Rq$@ zY^F8mEi5_FX{`LG@*3yse^4jqxHIjW2B%8x>$yS6OFA+Fbprf2}FQ{8r_iEF1IRm?5&sJD3-r zVI|K{#!xU=jB0iD4#TbtyrA2S&Drem(grIwI%l8N!Kq1%rE1JwgF{=UR)_2|=7FuU z{fF;wO_~+!^*W5sh}u2K81Al%by~br&!7xr?o6L88pYK@hB*!Pj}^q0SwH;&KdN)I z2DbL$f`thguaVMY~^T_%k0V3eb%SX&uOkK{I7WELAIR{RL- zhYv*k$6_AL3QP0P{EeQ?-?1~3%`3wlyi5ieR^{;cE;GhY_&dJE8qbY`!lX5@%Fvnl zS*R}S=ITe*x_$*EVa}|_!*UjUn1I8+G?1g7>s`x>s%HzfZY#x=HXhOlC@aJQkP=*J? z;ca8kLc5qRdy79S==8nXabt;0=%UKuIB*HyT(%3e93|+!|7|!_fB=q5`T%gh)6?QbW2*1kK;Y|%_ z{9l~ZLcy__itV#>Otm^X!XLBpS=|v@%kPa>MaQSi6+#{NWj=;0R&;E<8tS)dyz5oA z+;aUTf9L8eV@a8ddw>^M*mR;SF5cpUZ{DaX28{m0Ha&Kf)r*F7vD(Y5-d)W(4J#=p)F-ePYslckuyMnHL&BQ3rH`%|4={!_L_;U!<< z`}ov{_YYwfmWBxuTd_fA3wmXpYN8fH1=H&A*^mpcVn(P{WXmDya99T2v9jy_pngO- zpSj2wXB9(SL4-3^rs}Vgp)AyVJ`9fa!LR^&MX*BL(u43dgD>U;NCT^2G593V0{_AF zfZG{!OQr#CV8dAgum>_WnniNci6!76pJjDbZlG|Y47R`s;-&~~IN5H zkp)M#{1I*h??+t1$sxlyfIJP?%?7ioAbS>!aKUWQf5XM(cf;i|#lgQ}UaUScr%|#O z6XQZ{l~u!{5W*CqqvM+PLHP>iYWT+PkckR}VmLA!9WF}Q5I30NLHOTBed}M+~Q8 zD9DTjBnkxT!4S=HkiEPtJ8ZOMN00$(Sr~!?*^97~Z?Gs6ugT-gk9|=Owy7z}L>Gm3 zt^Y9}EGCb9gC!6_Ds!v5C|*p?FCJlNH5U1oglqH&*;dVoJE+Sv{4wTOni$yF?KYSk zw%{cVlkNA7kG^r3;IaZbT{ccx6NlkU@M0yn801Ez<88!hy~{Qw6}b@Dc~(3vOh=~w zM&k?G#W?XOWT4HA2*j4Gf>>o%dR^yaQ^13648@$w3zYfRKJ@pf^ANzDWpYg@yZ%m4 z6>OyE5MU$2n8jCuL?NxeGt)G16Y%k;#EklgtFTE&V^qQ`Dbw99?2IUib>ZE~hxzc* zI?)9d9dGiuX0Kp|TA{4CN@YXf$CprVqT@1`MmyO6LNJPMioPOS4uoObeH{ zV0(4+js@+(L{NEL85YR)AUgs9GH-SgodgsqK(X2~ zUopjJtyl;k`M5$Z;U4-7?qMsBE2wpe;gMn6*f5{yBCjpWU`A5)Hs11XO(?suO8anUaKRYJh-<~i;-c796I6WT zHX1W#%ngiyexS=N7wfM~WMJZ`5t{HKW5R4V3S{~Q#8sv7Fua z{QG@$V4!()*>q}{h&|#yyxb9Pu~`OuWI|#DDg$4nUISEgSvj$v`enSo4TVg4y+WR&H7U69Cs zHGE9D`dt)fCo|U!w(;ee;c$*gt?4Z0ewE4X-~)&8M(_BTo3J;x1`-LZ(1RhhhwD1= zYAPHu{1FIe8v2g>0pnB2(RaX(p^HQb8OqT#K}LixM~ka6woJ~DFlHEcf02M#NWY89F7oQ*#iwr*`=mA`jCt&8vfQGnYRSUIZ z`D$ePFq4HF;)ri(vJ#{W5zoo*nFlXYaq!|{0Iutdpqap-Ww^J!^E><-d**m~9(8A} zbnQ|nl7Ba@M%mQg&KM@&)81YHR(fR{V~u5)WEU0a3?wy--I#7QjNdS4rqrw-Fi`p^ zDYPUrzK(Yr9PAPcDf&!!I!t^Lu_Kcc@xe7clCvubA?DmIR<^ zAy9(|Np5Ap5n@+~b+%Y;iKPW(7|zX3WaW8Q(!<1#y=00gMMq{f$upYgkeFD?)I3v- zCZ5Q5@hLbaDJYDQO&wdAv~86MJz?{FR$~F%v1jTkqeg~}gG1RWHFwK{nAoFsX0}(R zu*jJ=;X1KGZDkWtl850-QHbk@>1lVZ%+e%1SU~(7n_iL0y1JQ!4!Jn6#nLr*qhb>S z3rvvVr-6qBSM#)HMXLw?{f;)Ft5;eG_* zPf(GH0{9I1?mUPH#1+Cj+b!9JPOg_T#)0yuB_*Ux@P@-GD6xQtc(j-X(=;RLA|1B> zk;{>)=WW9PkA6I6(S*#9d{W)|ao@hJ6Oew1Oq%}`2^JMJsr^|d;(*;5{(`fY>IJWhP#O0<_R{xaEFv{XtU5GMN4%F5Y&_Z+#YB)!_< zv+`$>1}z4I-Re~`57TX_1|!3WjHmsI@If&60GdL5%2oRw@fBlWP~Av1{0DVJrnNLv z+bv(uRy|v8V;b58#sT)on)R$AP#+1B>ycGtjzEcObQ$t0_)V)$*-NxM!U*yI z#RbTOyJpQX>M9>A#b7#SBs2Q(I5NCVZK*~cD|py%GE+;3#s`@(@Tt`cA+fnE`PeeA zWts4-=1`#WCE8;B%&`fu-8q9Xiv>3mG@_2Iz|0OVi*VB5Q<7kswim`HFYhv2HYs{J z<{{79EC)zb;m`3ATMDAfX#-FSo`9H)4)RsdfTWfoH1A{4rs(LPar#i7H9AWI;>;L4 zkWU$twALs(ISG8Hv+V$%}4Wk~BNQg_7F<_C{5 zytfGlYid23VDU(Qb|Z=9VYbCw+zb@mVJTesqPtW^b|VpAt)hvt9up_Lo!Je3rsx00?)sG>BmLofcA~TVfZ>snyWhNMeF}{T-#$F%u*;_1T zW2Weh{>qJ!x#M5vB!UrS(wSG*8wI>NRAookWaV6nuQ)!&<+>WCuhvL@smT*^>wI1XLqpfJ^b1B}%$PQAha{YJP!Sv7wPGiNC}y1c z621WvU4iJuo(FeD*-C5|Xl?a?@!`J+giS`=a)-5(Bj^%rFceh3iS?M0uGMta?9|Fk zWD@Py>Z9s{A2&A^j}JeKMWF4-hP8=*GZFAOGOJc>F*Z?+Q06_yLaWT;lxtW%{-mrb z@)3NNFlxxU#RjZD^+vwNShPw@B~5T+L5unNqiddffJ|~-eB-6$@@A5-I4sursLC=a zLHElTfDFfK)5a2;FhItFB(ij1cu2*46WkBaUfWl0{hju^j;f#UeOs zsY6!3do8*%KtCDQ?MbzC={2mV<4(b5z=Yx&i@>|hDeAM?>wcB}M6~={nVbwxQk=gn zem-e(QOo#|AR1LsX1s6Gq}HNi``3biI7)bIO`<+nRTuki3#~S0fKR1d>aD4uR7(!W z-%XWMy9iH%V|fHnItB^!VvdP_zOUk(NxKZVY>_gVUTJ}FDXf+<6K*}3>c7OwV7jEk#G8OgA#uH1|=q+ zaE_K0keP91;ZXwdA2Ms6DoYDL#HX3ZNPof^Ny&7;tTX8jlSsu1&7I=TD>DlgzgE^I z&%j*1z?uUIezyw6Z=cjhmQg~cCrvWStW&TlH7S?5Deh2!WQxv?n$9pj@PU7RWnjf1 zu8hC8s@n{XvXzg0L<0V5x~Us0j2WM!3Z;dqOC(-pZ6serYCjK{THr!)A7WiKL#Ks&qmmx7g#u8=`HA}v057$<9B()5wA~s(5#EPpO z2n-$fgpE8BuRR?$ra_<;zw5`GrT#cIffm^|p{*|RbgLhSS)}9$R3ZYo8zOhtd2icy zB$wQCJP1*3+YX^~;h)DszATDJsF-$Ee~04Akjj|5ytV|LI8F=ef-3{slRd*1Xr&mt z6pkH0$Jm7Ltk$KjdT*cC&OEj1F87+^|Bwl_b4y;DvA9g~w!;KxX`9NVt(rAgzHUcS zT8g$=8(WLNv_hlo=wn&CNz4}jJOX=S;BtAy`1XV;bWYYUy9@!Vx)t`p=HS0%24AM` zfQJMdkeYPSBF9oFHIU4eeZaWyjc`kqbp4OG+b)wL6?;W`LM*$@UcxC=rJGb zzF{@kB+F2%@Y;4@T5lvv#a-@5poyhfpRrNN)+ttg1P5B0M}zDQDTX&omo^4AS5pPTx4ym!37Fg8Q6gkPWg(4~dPsXgtSkj#<0%+Ne?2~A9OKsT*QOW! zC3GHogCdt+9a1U?7T`naGh64b3*TBb9;BJkC|uH9=04k&$NB0STfwR_SfGRL^4KJBjh~%acmX zd~x1t=VOyY!2Z^iO_?XBnQZ3h>%~iYg6D-nXAhf|jUuA1&cSgxdB_C}K zb?x#?mIwRS>B!n%J4)>Z4bRtXd8{VN3i0wp{MWiVJicBN{pHB6=s@tLkOgUJ0Af!C zgC)Xkm!Iy*Hc|Fjp{$20i~SYL{72!^=YuL50hcg2Nb zF_T&=5r!Ab(2_94#C>$?h3A`2@$y{1tc zaiIQ6T$=`5yY%cE*x|^Nai<+fAqYjZuw`#-wTh4tTg$b#5{LU565rG9UO83U4D%rg zn_}&-*rRZ+OD9UOXd?xOd!6>%Rg-vD*kCyAFS`+%5ZKmPjqy?;5lUttv1Yx@929PQ zU6%FPV{ym+HOa-gMIg-tPPC=a!dX!Z#kM9z2(3hmh=?}QM%@-Rsiu)y@w8_GWpg*t z_oKQF8;ejsElzlkue3HI%Vzqe5Lc1TF-=?A-XV$mw}|;z5VVIPLdlWfi7H4N`MN%y zA4syVVd-v2vwuh?03lB*Dt&7RL0i+HCdMWY9%zi z?xHr6d4=q`aH+-!Z9#f;GeZPwZA?skvvD>8_7pMj4eUqO(?Er5Ov=&ye1vXR&q7-7 zIoS?^bhSy@V$ZZlkPpeYYumz1muo~Q0CK1B>GDj@$i`B&NkW~iOItZYn|<}lB@z6z zG_|H~HHL@cmtNuDz(iUDLicnO@`OOK8Wa?HsmO5~Ex?^MCm#V3WGoqw~j_ zf<37=RoU%#Ba~3GY-j=@siqKnw0VtXN{?Ii=(r_b6hR7*X!E1~K~E7p0ac@}@< zKTnO&CWeFpLW4`YB3z*q#e0Mxd6Sx)xLTK!xG_H82=#NOo+29EzryW7ge6mJ`~G^p z^Q2xb_c@Y)=5cbwuqAlslJ2lHm}3#L3+KKLO_X$)Gf13|*nkIuRMC@TiCgcAZ$Nik zdB!cG>0-|z$DaoEARo_$G)s3;Z+Csz(q#`I^R3x98$G%e-EJ>8iss||X4`gW0!+5> z8vJnKCE(=l?tGns;TaRuldMxJv_&{A=BeD)D6EYKj=4F>0#B z{M=fM$jXjhJ;x3Z;ctvb>6+F5XoU7Odv*dOb}cB4R~wrES|MIi))WicD-3glm}#m* zO~UG>^LY1W?+RE7?bkghl%~*!^x(Fx_f9e>W3#rkfz~tkboiJ+pJ3y0U&3XLt$Xz1 znyK!bt6(>!8`C~%Ti_E0O%=4a%Y#5>`aY6Q==d3%FyiZM;7`eGg%z-7RXuH#vjNs+ zO1eBBTX8MX^}v(W(?uakYJT8(9(yNjzf_n;LAB8mxBaU*!vTshbF&ZhymnI*VRqEB z^a&pkt^~nhkmp!flpzoR$Q_a~XPU zw`>u2e)8 zt728lnh0KyhkSfwi38teDjJ?(v1lWUx_aRA$x#>MQlgG|Y+R1#TI_w5@RH2(AeQ5l z?~|*yv#~^#UqVBSd4iPo&T*HVNAqxk$GPl9aY(kg$cM@`+18)I&k^3){7*u`N_(|t zh)5rDYR+g$nshQ3$n7|n>}VBnl0C1T&o@Tw!4dCJOH(7ewlJ4rY-^3`sNPIBz0(bMy)LAX(=_#$i%?Je-@^SXjT4Gl?^;|1$Me$Dgr6p)7r)#f7 z3m?+$SxFre6E9J!L9hkuPr$VeaIh^#tc^!w#QKahM*z_c8b`{#B;=gi3q1^O*@!!*eWlP92YAE(@3n4MFw=iXvMM#+XtS8wr}6%&X5@p`mMgxaa5} z&do26UHE>*vmHC3XV?Q{{Wpj2>}pi#SyVkqitql?cUsCelGw5(*S?+R4<|dU72-i{ ztau5p&M6pFp5I@l4bJlbw)J@;Y3DSXVN=S+_EBT7!huV*%|aH_Qh9z!Z1)tzP>po$ z5!!#^afv3I=X^qBG6ig;c#TG9)ia@Ym}5BNMa z42AvU%QB&_^zki_)dF+WNWv#VE3_95x9>qb+MMwyx@TR$#|`hmGsC8un^{`Es*%8423%02y>{@ z1AU$mh@Y3cEn0HP(PWflyo7SpcHy8ruh3(w>(qaGI!1EURhsc49$7UNvu!5twD@`L z#T>RKpUf!d!kMddLNnxLoO0%AlieDQZTFrXo}TWqbxQ_(%9x2~@Ri*jBjHUF+xf#8 z^t0qu?JzG2J(k4N!?Zy?_v`6xx9xjr#4i*|W2;bd5QX*>xsnoL9`VU9?J>HxEDovBC2N$R zzAf{Olx*!C@vMg}XjcT9j4m5zD3Eut4(2rjazTEkc_u{OCfz(}-CTB}6z?Ob_F*?8 z*R*<^lSKALnDe>C9_Th)#|Vz~RE2sDaDm>#l2sN2XJu;Ia>m5trLMew^LR$E>iuhC zoX2_WpfMp%y(mKx)2ZLb=0AHQ9Q@8v*pf3GW^GvzyB|CgUQPKmo(RQ#!gQgO4aDaq zv>bA+_1Z#U?3g=^GsCWz}ZK;>*z3qgl(p~mi^&FRt z(5~?My7xh(>5S62a|DFx@87zMG1quDPV>n1 z6YgCWAzxO>j!sS0a+-#0+?+YkXmWf~)GV>MI}%r=+5|jBbYbg+S4-sNgPn9I!Z2>v z6`58db-(i19sxBI@xlXJWKujXVKgP~Li9X|YMz3V0kEtX3iqS__lnc823tG<&ztr#R)U5~P=S`lnyq=db!F|X*WsesI*LgSvMj)5A!s&%QDjJeFV>iN2RWn|+#=6gJMpGq)0=QLBnG|0<$+6Tfo zcP>LH!D~4rZ!{EF8Si$<{TKMrt&p_NDrSy6=ODkrYrDEsyrAAI2M}5u{l^0)-xFv(%O=oNb2()MARH6lD7cpN$5Z4Fut3FTXJ98h#1UGU^) z1)JY?LLv&H1GJ~Z5}i2JW-M$cLMxsr4V{)a5uw3!*iUW&({`8KaAL3CNLw~vGMg~h zxu!P~4PF$I%O*VYV!hHFp>dA|}0>eI1A6I{kY#sr#(KmNkNW_+;Z3d*1%JNPS^Gs3>yWp%G zCj;`Nhx5)^-7NA-Va4_pyX;q-fVcDrR|H!2px@Uo-8G@!%%w9c8^<1BLS6=Am=|3OPP|sc15X~_GsCepSD|Uh$^m=X zVco^fnK4VbH|}vnlkMO2@M?HKgIa_)XCa3LZ<9AMgT2BUdAVr?F^VbFa+74Ga+1ym z`$kXhRuUT}=Z(-)aW;&UAFh;mJhPByP}#1dhZQ_~w@w-m0O;OqFteNZ>%gnmyi`Vo zcK)sd z%VV-(BRuhSRa*UqHyU`?kcSTgHBX31@4hfVi*&C-iR>Ip^@!|>6&r`le^$7wWVsP~ ze$$4ObWe{Pp}li6vHR7Wo)6ipRxFa)3TY>1gcy2BPp#yscRb{d7Rl|I+p63bAJ0Kd zv(WNN@zi;El#hOaLYwl|JT>Pewq6rM#`i)=jz}+0*x2^ zp&#OeDDd!sN#73FrXz3v^nCdG^ZA_@wtYx@&lw;4$JRgI+CM*^GlDPr=d*wI>3rcu zZ(sl6t52VO|CKkt{m1|GA3ppmkN?+C-+q^i3+DJ?iX|uorwCT7i4-wt*rOubnnaQ{vI!<^9Z(y*;u&* ztGq9)`x6(|{S24Wy)UBsnJ%L9`UH=m*=pqJ&Y$3Nx*vNHo$blOSSe@=2k*=2{>0^U ze?u41{Sq&oOERhQ7C$9H@{7EP?(gT)Ihj@o!C|6Y^1dH=5#3+y(z%rCtyyDu#j^DK zBD$aCBD(jbbMH&%-j~k(1umU?UoiK+Oz!c@76U)a-~r!ua?#uSg1PqvbMFi0-WSZh zFPM8@F!vX_VD8s;ncVy0xIgdWxSTV`>*Nw(dBX7vxJ+)P9oxP~TOvx{_hT=Z`z2l` zheJ@CwM+NQOX7Z(3+Dc=FOv%}lSe*wJ9_!s-_!+jzrM@l{`ZUH-j~GvGB1hql=IES zLw15$JAPjr_wQXC_r4_V|KXCjztV+p@5|r*oy*^T)J1RaOWod=y8T}-bxQ}V$0BYT zf3wK%o8I0xz5TQ|y?Jrc@K}>&ui3Lozv}DXY|}MioZcqz(&V4$PPm`o`nO`pk{J>) ziYh`|B?R|gSpT58CeO250s Date: Sun, 27 Oct 2024 11:04:11 -0700 Subject: [PATCH 41/42] a much cleaner test --- src/core/src/collection.rs | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 25803d6de2..440770f60f 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -416,41 +416,13 @@ mod test { } } - // this test could probably be simpler #[test] + #[should_panic] // for now... fn sigstore_sig_from_record_2() { - use crate::manifest::Record; - use crate::selection::Selection; - use crate::storage::{FSStorage, InnerStorage}; - - // load test sigs let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); filename.push("../../tests/test-data/short.sig.gz"); - let filename: String = filename.into(); - let filename_ref = filename.as_str(); - - // load signatures - let sigs = Signature::from_path(filename.clone()).expect("error loading"); - - // convert to records, loaded from 'short.sig.gz'. - let records: Vec = sigs - .into_iter() - .flat_map(|v| Record::from_sig(&v, filename_ref)) - .collect(); - - eprintln!("{:?}", records); - - // build a new collection using this manifest - let manifest: Manifest = records.into(); - let collection = Collection::new( - manifest, - InnerStorage::new( - FSStorage::builder() - .fullpath("../../tests/test-data/".into()) - .subdir("".into()) - .build(), - ), - ); + let v = [filename]; + let collection = Collection::from_paths(&v).expect("no sigs!?"); // pull off first record let v: Vec<_> = collection.iter().collect(); From abc01c168de7d13d1f36d1c3efe055586e47f875 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 27 Oct 2024 11:06:13 -0700 Subject: [PATCH 42/42] cargo fmt --- src/core/src/collection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 440770f60f..28659df05f 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -417,7 +417,7 @@ mod test { } #[test] - #[should_panic] // for now... + #[should_panic] // for now... fn sigstore_sig_from_record_2() { let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); filename.push("../../tests/test-data/short.sig.gz");