Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Loading revindex from zipstorage #1943

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ include/sourmash.h: src/core/src/lib.rs \
src/core/src/ffi/hyperloglog.rs \
src/core/src/ffi/minhash.rs \
src/core/src/ffi/signature.rs \
src/core/src/ffi/manifest.rs \
src/core/src/ffi/nodegraph.rs \
src/core/src/ffi/index/mod.rs \
src/core/src/ffi/index/revindex.rs \
Expand Down
62 changes: 62 additions & 0 deletions include/sourmash.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ enum SourmashErrorCode {
SOURMASH_ERROR_CODE_PARSE_INT = 100003,
SOURMASH_ERROR_CODE_SERDE_ERROR = 100004,
SOURMASH_ERROR_CODE_NIFFLER_ERROR = 100005,
SOURMASH_ERROR_CODE_CSV_ERROR = 100006,
};
typedef uint32_t SourmashErrorCode;

Expand All @@ -50,14 +51,24 @@ typedef struct SourmashHyperLogLog SourmashHyperLogLog;

typedef struct SourmashKmerMinHash SourmashKmerMinHash;

typedef struct SourmashLinearIndex SourmashLinearIndex;

typedef struct SourmashManifest SourmashManifest;

typedef struct SourmashManifestRowIter SourmashManifestRowIter;

typedef struct SourmashNodegraph SourmashNodegraph;

typedef struct SourmashRevIndex SourmashRevIndex;

typedef struct SourmashSearchResult SourmashSearchResult;

typedef struct SourmashSelection SourmashSelection;

typedef struct SourmashSignature SourmashSignature;

typedef struct SourmashSignatureIter SourmashSignatureIter;

typedef struct SourmashZipStorage SourmashZipStorage;

/**
Expand All @@ -78,6 +89,15 @@ typedef struct {
bool owned;
} SourmashStr;

typedef struct {
uint32_t ksize;
uint8_t with_abundance;
SourmashStr md5;
SourmashStr internal_location;
SourmashStr name;
SourmashStr moltype;
} SourmashManifestRow;

bool computeparams_dayhoff(const SourmashComputeParameters *ptr);

bool computeparams_dna(const SourmashComputeParameters *ptr);
Expand Down Expand Up @@ -264,6 +284,32 @@ void kmerminhash_slice_free(uint64_t *ptr, uintptr_t insize);

bool kmerminhash_track_abundance(const SourmashKmerMinHash *ptr);

void linearindex_free(SourmashLinearIndex *ptr);

uint64_t linearindex_len(const SourmashLinearIndex *ptr);

SourmashStr linearindex_location(const SourmashLinearIndex *ptr);

const SourmashManifest *linearindex_manifest(const SourmashLinearIndex *ptr);

SourmashLinearIndex *linearindex_new(SourmashZipStorage *storage_ptr,
SourmashManifest *manifest_ptr,
SourmashSelection *selection_ptr,
bool use_manifest);

SourmashLinearIndex *linearindex_select(SourmashLinearIndex *ptr,
const SourmashSelection *selection_ptr);

void linearindex_set_manifest(SourmashLinearIndex *ptr, SourmashManifest *manifest_ptr);

SourmashSignatureIter *linearindex_signatures(const SourmashLinearIndex *ptr);

const SourmashZipStorage *linearindex_storage(const SourmashLinearIndex *ptr);

SourmashManifestRowIter *manifest_rows(const SourmashManifest *ptr);

const SourmashManifestRow *manifest_rows_iter_next(SourmashManifestRowIter *ptr);

void nodegraph_buffer_free(uint8_t *ptr, uintptr_t insize);

bool nodegraph_count(SourmashNodegraph *ptr, uint64_t h);
Expand Down Expand Up @@ -353,6 +399,20 @@ double searchresult_score(const SourmashSearchResult *ptr);

SourmashSignature *searchresult_signature(const SourmashSearchResult *ptr);

bool selection_abund(const SourmashSelection *ptr);

uint32_t selection_ksize(const SourmashSelection *ptr);

HashFunctions selection_moltype(const SourmashSelection *ptr);

SourmashSelection *selection_new(void);

void selection_set_abund(SourmashSelection *ptr, bool new_abund);

void selection_set_ksize(SourmashSelection *ptr, uint32_t new_ksize);

void selection_set_moltype(SourmashSelection *ptr, HashFunctions new_moltype);

void signature_add_protein(SourmashSignature *ptr, const char *sequence);

void signature_add_sequence(SourmashSignature *ptr, const char *sequence, bool force);
Expand Down Expand Up @@ -387,6 +447,8 @@ void signature_set_mh(SourmashSignature *ptr, const SourmashKmerMinHash *other);

void signature_set_name(SourmashSignature *ptr, const char *name);

const SourmashSignature *signatures_iter_next(SourmashSignatureIter *ptr);

SourmashSignature **signatures_load_buffer(const char *ptr,
uintptr_t insize,
bool _ignore_md5sum,
Expand Down
1 change: 1 addition & 0 deletions src/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ vec-collections = "0.3.4"
piz = "0.4.0"
memmap2 = "0.5.0"
ouroboros = "0.15.0"
csv = "1.1.6"

[dev-dependencies]
assert_matches = "1.3.0"
Expand Down
5 changes: 5 additions & 0 deletions src/core/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ pub enum SourmashError {
#[error(transparent)]
IOError(#[from] std::io::Error),

#[error(transparent)]
CsvError(#[from] csv::Error),

#[cfg(not(all(target_arch = "wasm32", target_vendor = "unknown")))]
#[error(transparent)]
Panic(#[from] crate::ffi::utils::Panic),
Expand Down Expand Up @@ -104,6 +107,7 @@ pub enum SourmashErrorCode {
ParseInt = 100_003,
SerdeError = 100_004,
NifflerError = 100_005,
CsvError = 100_006,
}

#[cfg(not(all(target_arch = "wasm32", target_vendor = "unknown")))]
Expand All @@ -130,6 +134,7 @@ impl SourmashErrorCode {
SourmashError::IOError { .. } => SourmashErrorCode::Io,
SourmashError::NifflerError { .. } => SourmashErrorCode::NifflerError,
SourmashError::Utf8Error { .. } => SourmashErrorCode::Utf8Error,
SourmashError::CsvError { .. } => SourmashErrorCode::CsvError,
}
}
}
90 changes: 90 additions & 0 deletions src/core/src/ffi/index/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
pub mod revindex;

use crate::encodings::HashFunctions;
use crate::index::{Selection, SigStore};
use crate::signature::Signature;

use crate::ffi::signature::SourmashSignature;
Expand Down Expand Up @@ -35,3 +37,91 @@ pub unsafe extern "C" fn searchresult_signature(
let result = SourmashSearchResult::as_rust(ptr);
SourmashSignature::from_rust((result.1).clone())
}

//================================================================

pub struct SourmashSelection;

impl ForeignObject for SourmashSelection {
type RustObject = Selection;
}

#[no_mangle]
pub unsafe extern "C" fn selection_new() -> *mut SourmashSelection {
SourmashSelection::from_rust(Selection::default())
}

#[no_mangle]
pub unsafe extern "C" fn selection_ksize(ptr: *const SourmashSelection) -> u32 {
let sel = SourmashSelection::as_rust(ptr);
if let Some(ksize) = sel.ksize() {
ksize
} else {
todo!("empty ksize case not supported yet")
}
}

#[no_mangle]
pub unsafe extern "C" fn selection_set_ksize(ptr: *mut SourmashSelection, new_ksize: u32) {
let sel = SourmashSelection::as_rust_mut(ptr);
sel.set_ksize(new_ksize);
}

#[no_mangle]
pub unsafe extern "C" fn selection_abund(ptr: *const SourmashSelection) -> bool {
let sel = SourmashSelection::as_rust(ptr);
if let Some(abund) = sel.abund() {
abund
} else {
todo!("empty abund case not supported yet")
}
}

#[no_mangle]
pub unsafe extern "C" fn selection_set_abund(ptr: *mut SourmashSelection, new_abund: bool) {
let sel = SourmashSelection::as_rust_mut(ptr);
sel.set_abund(new_abund);
}

#[no_mangle]
pub unsafe extern "C" fn selection_moltype(ptr: *const SourmashSelection) -> HashFunctions {
let sel = SourmashSelection::as_rust(ptr);
if let Some(hash_function) = sel.moltype() {
hash_function
} else {
todo!("empty hash_function case not supported yet")
}
}

#[no_mangle]
pub unsafe extern "C" fn selection_set_moltype(
ptr: *mut SourmashSelection,
new_moltype: HashFunctions,
) {
let sel = SourmashSelection::as_rust_mut(ptr);
sel.set_moltype(new_moltype);
}

//================================================================
//
pub struct SignatureIterator {
iter: Box<dyn Iterator<Item = SigStore>>,
}

pub struct SourmashSignatureIter;

impl ForeignObject for SourmashSignatureIter {
type RustObject = SignatureIterator;
}

#[no_mangle]
pub unsafe extern "C" fn signatures_iter_next(
ptr: *mut SourmashSignatureIter,
) -> *const SourmashSignature {
let iterator = SourmashSignatureIter::as_rust_mut(ptr);

match iterator.iter.next() {
Some(sig) => SourmashSignature::from_rust(sig.into()),
None => std::ptr::null(),
}
}
Loading