Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Oxidized LinearIndex #1526

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ include/sourmash.h: src/core/src/lib.rs \
src/core/src/ffi/minhash.rs \
src/core/src/ffi/signature.rs \
src/core/src/ffi/nodegraph.rs \
src/core/src/ffi/index/mod.rs \
src/core/src/ffi/index/linear.rs \
src/core/src/index/mod.rs \
src/core/src/index/linear.rs \
src/core/src/errors.rs
cd src/core && \
RUSTUP_TOOLCHAIN=nightly cbindgen -c cbindgen.toml . -o ../../$@
Expand Down
45 changes: 45 additions & 0 deletions include/sourmash.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ enum HashFunctions {
};
typedef uint32_t HashFunctions;

enum SearchType {
SEARCH_TYPE_JACCARD = 1,
SEARCH_TYPE_CONTAINMENT = 2,
SEARCH_TYPE_MAX_CONTAINMENT = 3,
};
typedef uint32_t SearchType;

enum SourmashErrorCode {
SOURMASH_ERROR_CODE_NO_ERROR = 0,
SOURMASH_ERROR_CODE_PANIC = 1,
Expand Down Expand Up @@ -50,8 +57,14 @@ typedef struct SourmashHyperLogLog SourmashHyperLogLog;

typedef struct SourmashKmerMinHash SourmashKmerMinHash;

typedef struct SourmashLinearIndex SourmashLinearIndex;

typedef struct SourmashNodegraph SourmashNodegraph;

typedef struct SourmashSearchFn SourmashSearchFn;

typedef struct SourmashSearchResult SourmashSearchResult;

typedef struct SourmashSignature SourmashSignature;

/**
Expand Down Expand Up @@ -248,6 +261,26 @@ void kmerminhash_slice_free(uint64_t *ptr, uintptr_t insize);

bool kmerminhash_track_abundance(const SourmashKmerMinHash *ptr);

const SourmashSearchResult *const *linearindex_find(const SourmashLinearIndex *ptr,
const SourmashSearchFn *search_fn_ptr,
const SourmashSignature *sig_ptr,
uintptr_t *size);

void linearindex_free(SourmashLinearIndex *ptr);

void linearindex_insert_many(SourmashLinearIndex *ptr,
const SourmashSignature *const *search_sigs_ptr,
uintptr_t insigs);

uintptr_t linearindex_len(const SourmashLinearIndex *ptr);

SourmashLinearIndex *linearindex_new(void);

SourmashLinearIndex *linearindex_new_with_sigs(const SourmashSignature *const *search_sigs_ptr,
uintptr_t insigs);

SourmashSignature **linearindex_signatures(const SourmashLinearIndex *ptr, uintptr_t *size);

void nodegraph_buffer_free(uint8_t *ptr, uintptr_t insize);

bool nodegraph_count(SourmashNodegraph *ptr, uint64_t h);
Expand Down Expand Up @@ -292,6 +325,18 @@ SourmashNodegraph *nodegraph_with_tables(uintptr_t ksize,
uintptr_t starting_size,
uintptr_t n_tables);

void searchfn_free(SourmashSearchFn *ptr);

SourmashSearchFn *searchfn_new(SearchType search_type, double threshold, bool best_only);

SourmashStr searchresult_filename(const SourmashSearchResult *ptr);

void searchresult_free(SourmashSearchResult *ptr);

double searchresult_score(const SourmashSearchResult *ptr);

SourmashSignature *searchresult_signature(const SourmashSearchResult *ptr);

void signature_add_protein(SourmashSignature *ptr, const char *sequence);

void signature_add_sequence(SourmashSignature *ptr, const char *sequence, bool force);
Expand Down
2 changes: 1 addition & 1 deletion nix/rust.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
let
pkgs =
import sources.nixpkgs { overlays = [ (import sources.rust-overlay) ]; };
rustVersion = pkgs.rust-bin.stable.latest.rust.override {
rustVersion = pkgs.rust-bin.nightly.latest.rust.override {
#extensions = [ "rust-src" ];
#targets = [ "x86_64-unknown-linux-musl" ];
targets = [ "wasm32-wasi" "wasm32-unknown-unknown" ];
Expand Down
2 changes: 2 additions & 0 deletions shell.nix
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ in
(python38.withPackages(ps: with ps; [ virtualenv tox setuptools ]))
(python39.withPackages(ps: with ps; [ virtualenv setuptools ]))
(python37.withPackages(ps: with ps; [ virtualenv setuptools ]))
rust-cbindgen
py-spy
heaptrack
cargo-watch
cargo-limit
wasmtime
wasm-pack
gdb
];

shellHook = ''
Expand Down
1 change: 1 addition & 0 deletions src/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ serde_json = "1.0.53"
primal-check = "0.3.1"
thiserror = "1.0"
typed-builder = "0.9.0"
atomic_float = "0.1.0"

[target.'cfg(all(target_arch = "wasm32", target_vendor="unknown"))'.dependencies.wasm-bindgen]
version = "0.2.62"
Expand Down
116 changes: 116 additions & 0 deletions src/core/src/ffi/index/linear.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
use std::slice;

use crate::index::linear::LinearIndex;
use crate::index::{Index, SigStore};
use crate::signature::Signature;

use crate::ffi::index::SourmashSearchResult;
use crate::ffi::search::SourmashSearchFn;
use crate::ffi::signature::SourmashSignature;
use crate::ffi::utils::ForeignObject;

pub struct SourmashLinearIndex;

impl ForeignObject for SourmashLinearIndex {
type RustObject = LinearIndex<Signature>;
}

#[no_mangle]
pub unsafe extern "C" fn linearindex_new() -> *mut SourmashLinearIndex {
SourmashLinearIndex::from_rust(LinearIndex::builder().build())
}

ffi_fn! {
unsafe fn linearindex_new_with_sigs(
search_sigs_ptr: *const *const SourmashSignature,
insigs: usize,
) -> Result<*mut SourmashLinearIndex> {
let search_sigs: Vec<SigStore<Signature>> = {
assert!(!search_sigs_ptr.is_null());
slice::from_raw_parts(search_sigs_ptr, insigs)
.iter()
.map(|sig| SourmashSignature::as_rust(*sig).clone().into())
.collect()
};

let linear_index = LinearIndex::builder().datasets(search_sigs).build();

Ok(SourmashLinearIndex::from_rust(linear_index))
}
}

ffi_fn! {
unsafe fn linearindex_insert_many(
ptr: *mut SourmashLinearIndex,
search_sigs_ptr: *const *const SourmashSignature,
insigs: usize,
) -> Result<()> {
let index = SourmashLinearIndex::as_rust_mut(ptr);

slice::from_raw_parts(search_sigs_ptr, insigs)
.iter()
.try_for_each(|sig| {
let s = SourmashSignature::as_rust(*sig).clone();
index.insert(s)
})
}
}

#[no_mangle]
pub unsafe extern "C" fn linearindex_free(ptr: *mut SourmashLinearIndex) {
SourmashLinearIndex::drop(ptr);
}

#[no_mangle]
pub unsafe extern "C" fn linearindex_len(ptr: *const SourmashLinearIndex) -> usize {
let index = SourmashLinearIndex::as_rust(ptr);
index.len()
}

ffi_fn! {
unsafe fn linearindex_signatures(ptr: *const SourmashLinearIndex,
size: *mut usize) -> Result<*mut *mut SourmashSignature> {
let index = SourmashLinearIndex::as_rust(ptr);

let sigs = index.signatures();

// FIXME: use the ForeignObject trait, maybe define new method there...
let ptr_sigs: Vec<*mut SourmashSignature> = sigs.into_iter().map(|x| {
Box::into_raw(Box::new(x)) as *mut SourmashSignature
}).collect();

let b = ptr_sigs.into_boxed_slice();
*size = b.len();

Ok(Box::into_raw(b) as *mut *mut SourmashSignature)
}
}

ffi_fn! {
unsafe fn linearindex_find(
ptr: *const SourmashLinearIndex,
search_fn_ptr: *const SourmashSearchFn,
sig_ptr: *const SourmashSignature,
size: *mut usize,
) -> Result<*const *const SourmashSearchResult> {
let linearindex = SourmashLinearIndex::as_rust(ptr);
let search_fn = SourmashSearchFn::as_rust(search_fn_ptr);
let query = SourmashSignature::as_rust(sig_ptr);

let results: Vec<(f64, Signature, String)> = linearindex
.find_new(search_fn, query)?
.into_iter()
.collect();

// FIXME: use the ForeignObject trait, maybe define new method there...
let ptr_sigs: Vec<*const SourmashSearchResult> = results
.into_iter()
.map(|x| Box::into_raw(Box::new(x)) as *const SourmashSearchResult)
.collect();

let b = ptr_sigs.into_boxed_slice();
*size = b.len();

Ok(Box::into_raw(b) as *const *const SourmashSearchResult)
}
}
37 changes: 37 additions & 0 deletions src/core/src/ffi/index/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
pub mod linear;

use crate::signature::Signature;

use crate::ffi::signature::SourmashSignature;
use crate::ffi::utils::{ForeignObject, SourmashStr};

pub struct SourmashSearchResult;

impl ForeignObject for SourmashSearchResult {
type RustObject = (f64, Signature, String);
}

#[no_mangle]
pub unsafe extern "C" fn searchresult_free(ptr: *mut SourmashSearchResult) {
SourmashSearchResult::drop(ptr);
}

#[no_mangle]
pub unsafe extern "C" fn searchresult_score(ptr: *const SourmashSearchResult) -> f64 {
let result = SourmashSearchResult::as_rust(ptr);
result.0
}

#[no_mangle]
pub unsafe extern "C" fn searchresult_filename(ptr: *const SourmashSearchResult) -> SourmashStr {
let result = SourmashSearchResult::as_rust(ptr);
(result.2).clone().into()
}

#[no_mangle]
pub unsafe extern "C" fn searchresult_signature(
ptr: *const SourmashSearchResult,
) -> *mut SourmashSignature {
let result = SourmashSearchResult::as_rust(ptr);
SourmashSignature::from_rust((result.1).clone())
}
2 changes: 2 additions & 0 deletions src/core/src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ pub mod utils;

pub mod cmd;
pub mod hyperloglog;
pub mod index;
pub mod minhash;
pub mod nodegraph;
pub mod search;
pub mod signature;

use std::ffi::CStr;
Expand Down
25 changes: 25 additions & 0 deletions src/core/src/ffi/search.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use crate::index::{JaccardSearch, SearchType};

use crate::ffi::utils::ForeignObject;

pub struct SourmashSearchFn;

impl ForeignObject for SourmashSearchFn {
type RustObject = JaccardSearch;
}

#[no_mangle]
pub unsafe extern "C" fn searchfn_free(ptr: *mut SourmashSearchFn) {
SourmashSearchFn::drop(ptr);
}

#[no_mangle]
pub unsafe extern "C" fn searchfn_new(
search_type: SearchType,
threshold: f64,
best_only: bool,
) -> *mut SourmashSearchFn {
let mut func = JaccardSearch::with_threshold(search_type, threshold);
func.set_best_only(best_only);
SourmashSearchFn::from_rust(func)
}
4 changes: 4 additions & 0 deletions src/core/src/ffi/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,3 +314,7 @@ pub unsafe extern "C" fn sourmash_str_free(s: *mut SourmashStr) {
(*s).free()
}
}

impl ForeignObject for SourmashStr {
type RustObject = SourmashStr;
}
Loading