Skip to content

Commit

Permalink
Allow subprojects to build their own precomputed indices
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed Dec 29, 2021
1 parent 75af0c8 commit a506d67
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 18 deletions.
2 changes: 2 additions & 0 deletions src/MMseqsBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "CommandDeclarations.h"
#include "DownloadDatabase.h"

const char* MMSEQS_CURRENT_INDEX_VERSION = "16";

Parameters& par = Parameters::getInstance();
std::vector<Command> baseCommands = {
{"easy-search", easysearch, &par.easysearchworkflow, COMMAND_EASY,
Expand Down
53 changes: 36 additions & 17 deletions src/commons/IndexReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,18 @@

class IndexReader {
public:
const static int PRELOAD_NO = 0;
const static int PRELOAD_DATA = 1;
const static int PRELOAD_INDEX = 2;
const static unsigned int PRELOAD_NO = 0;
const static unsigned int PRELOAD_DATA = 1;
const static unsigned int PRELOAD_INDEX = 2;

IndexReader(const std::string &dataName, int threads, int databaseType = SEQUENCES | HEADERS, int preloadMode = false, int dataMode=(DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA))
: sequenceReader(NULL), index(NULL) {
IndexReader(
const std::string &dataName,
int threads,
unsigned int databaseType = SEQUENCES | HEADERS,
unsigned int preloadMode = false,
int dataMode = DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA,
std::string failSuffix = ""
) : sequenceReader(NULL), index(NULL) {
int targetDbtype = FileUtil::parseDbType(dataName.c_str());
if (Parameters::isEqualDbtype(targetDbtype, Parameters::DBTYPE_INDEX_DB)) {
index = new DBReader<unsigned int>(dataName.c_str(), (dataName + ".index").c_str(), 1, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX);
Expand All @@ -24,13 +30,19 @@ class IndexReader {
seqType = data.seqType;
bool touchIndex = preloadMode & PRELOAD_INDEX;
bool touchData = preloadMode & PRELOAD_DATA;
if (databaseType & SRC_SEQUENCES) {
if (databaseType & USER_SELECT) {
sequenceReader = PrefilteringIndexReader::openNewReader(
index,
(databaseType & ~USER_SELECT) + 1,
databaseType & ~USER_SELECT,
dataMode & DBReader<unsigned int>::USE_DATA, threads, touchIndex, touchData
);
} else if (databaseType & SRC_SEQUENCES) {
sequenceReader = PrefilteringIndexReader::openNewReader(index,
PrefilteringIndexReader::DBR2DATA, PrefilteringIndexReader::DBR2INDEX, dataMode & DBReader<unsigned int>::USE_DATA, threads, touchIndex, touchData);

PrefilteringIndexReader::DBR2DATA, PrefilteringIndexReader::DBR2INDEX, dataMode & DBReader<unsigned int>::USE_DATA, threads, touchIndex, touchData);
} else if (databaseType & SEQUENCES) {
sequenceReader = PrefilteringIndexReader::openNewReader(index,
PrefilteringIndexReader::DBR1DATA, PrefilteringIndexReader::DBR1INDEX, dataMode & DBReader<unsigned int>::USE_DATA, threads, touchIndex, touchData);
PrefilteringIndexReader::DBR1DATA, PrefilteringIndexReader::DBR1INDEX, dataMode & DBReader<unsigned int>::USE_DATA, threads, touchIndex, touchData);
} else if (databaseType & SRC_HEADERS) {

sequenceReader = PrefilteringIndexReader::openNewHeaderReader(index,
Expand Down Expand Up @@ -63,10 +75,12 @@ class IndexReader {

if (sequenceReader == NULL) {
if (databaseType & (HEADERS | SRC_HEADERS)) {
sequenceReader = new DBReader<unsigned int>((dataName + "_h").c_str(), (dataName + "_h.index").c_str(), threads, dataMode);
} else {
sequenceReader = new DBReader<unsigned int>(dataName.c_str(), (dataName + ".index").c_str(), threads, dataMode);
failSuffix = "_h";
}
sequenceReader = new DBReader<unsigned int>(
(dataName + failSuffix).c_str(), (dataName + failSuffix + ".index").c_str(),
threads, dataMode
);
sequenceReader->open(DBReader<unsigned int>::NOSORT);
bool touchData = preloadMode & PRELOAD_DATA;
if (touchData) {
Expand All @@ -76,11 +90,16 @@ class IndexReader {
}
}

static const int SEQUENCES = 1;
static const int HEADERS = 2;
static const int SRC_HEADERS = 4;
static const int SRC_SEQUENCES = 8;
static const int ALIGNMENTS = 16;
static const unsigned int SEQUENCES = 1;
static const unsigned int HEADERS = 2;
static const unsigned int SRC_HEADERS = 4;
static const unsigned int SRC_SEQUENCES = 8;
static const unsigned int ALIGNMENTS = 16;
static const unsigned int USER_SELECT = 1 << 31;

static unsigned int makeUserDatabaseType(unsigned int baseKey) {
return baseKey | USER_SELECT;
}

int getDbtype() const {
return seqType;
Expand Down
2 changes: 2 additions & 0 deletions src/mmseqs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ const char* tool_introduction = "MMseqs2 (Many against Many sequence searching)
const char* main_author = "Martin Steinegger (martin.steinegger@snu.ac.kr)";
const char* show_extended_help = "1";
const char* show_bash_info = "1";
extern const char* MMSEQS_CURRENT_INDEX_VERSION;
const char* index_version_compatible = MMSEQS_CURRENT_INDEX_VERSION;
bool hide_base_commands = false;
void (*validatorUpdate)(void) = 0;
std::vector<Command> commands = {};
Expand Down
3 changes: 2 additions & 1 deletion src/prefiltering/PrefilteringIndexReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#include "IndexBuilder.h"
#include "Parameters.h"

const char* PrefilteringIndexReader::CURRENT_VERSION = "16";
extern const char* index_version_compatible;
const char* PrefilteringIndexReader::CURRENT_VERSION = index_version_compatible;
unsigned int PrefilteringIndexReader::VERSION = 0;
unsigned int PrefilteringIndexReader::META = 1;
unsigned int PrefilteringIndexReader::SCOREMATRIXNAME = 2;
Expand Down

0 comments on commit a506d67

Please sign in to comment.