Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(clp-s): Add command line options for stubbed out kv-pair-IR ingestion. #618

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions components/core/src/clp_s/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
po::options_description compression_options("Compression options");
std::string metadata_db_config_file_path;
std::string input_path_list_file_path;
constexpr std::string_view cJsonFileType{"json"};
constexpr std::string_view cKeyValueIrFileType{"kv-ir"};
std::string file_type{cJsonFileType};
// clang-format off
compression_options.add_options()(
"compression-level",
Expand Down Expand Up @@ -202,6 +205,10 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"disable-log-order",
po::bool_switch(&m_disable_log_order),
"Do not record log order at ingestion time."
)(
"file-type",
po::value<std::string>(&file_type)->value_name("FILE_TYPE")->default_value(file_type),
"The type of file being compressed (json or kv-ir)"
);
// clang-format on

Expand Down Expand Up @@ -255,6 +262,24 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
throw std::invalid_argument("No input paths specified.");
}

if (parsed_command_line_options.count("file-type") > 0) {
if (cJsonFileType == file_type) {
m_file_type = FileType::Json;
} else if (cKeyValueIrFileType == file_type) {
m_file_type = FileType::KeyValueIr;
if (m_structurize_arrays) {
SPDLOG_ERROR(
"Invalid combination of arguments; --file-type {} and "
"--structurize-arrays can't be used together",
cKeyValueIrFileType
);
return ParsingResult::Failure;
}
} else {
throw std::invalid_argument("Unknown FILE_TYPE: " + file_type);
}
}
gibber9809 marked this conversation as resolved.
Show resolved Hide resolved

// Parse and validate global metadata DB config
if (false == metadata_db_config_file_path.empty()) {
clp::GlobalMetadataDBConfig metadata_db_config;
Expand Down
8 changes: 8 additions & 0 deletions components/core/src/clp_s/CommandLineArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ class CommandLineArguments {
Stdout,
};

enum class FileType : uint8_t {
Json = 0,
KeyValueIr
};

// Constructors
explicit CommandLineArguments(std::string const& program_name) : m_program_name(program_name) {}

Expand Down Expand Up @@ -116,6 +121,8 @@ class CommandLineArguments {

bool get_record_log_order() const { return false == m_disable_log_order; }

[[nodiscard]] auto get_file_type() const -> FileType { return m_file_type; }

private:
// Methods
/**
Expand Down Expand Up @@ -184,6 +191,7 @@ class CommandLineArguments {
size_t m_target_ordered_chunk_size{};
size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB
bool m_disable_log_order{false};
FileType m_file_type{FileType::Json};

// Metadata db variables
std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;
Expand Down
2 changes: 2 additions & 0 deletions components/core/src/clp_s/JsonParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "../clp/GlobalMySQLMetadataDB.hpp"
#include "ArchiveWriter.hpp"
#include "CommandLineArguments.hpp"
#include "DictionaryWriter.hpp"
#include "FileReader.hpp"
#include "FileWriter.hpp"
Expand All @@ -29,6 +30,7 @@ using namespace simdjson;
namespace clp_s {
struct JsonParserOption {
std::vector<std::string> file_paths;
CommandLineArguments::FileType input_file_type{CommandLineArguments::FileType::Json};
std::string timestamp_key;
std::string archives_dir;
size_t target_encoded_size{};
Expand Down
13 changes: 11 additions & 2 deletions components/core/src/clp_s/clp-s.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ bool compress(CommandLineArguments const& command_line_arguments) {

clp_s::JsonParserOption option{};
option.file_paths = command_line_arguments.get_file_paths();
option.input_file_type = command_line_arguments.get_file_type();
option.archives_dir = archives_dir.string();
option.target_encoded_size = command_line_arguments.get_target_encoded_size();
option.max_document_size = command_line_arguments.get_max_document_size();
Expand All @@ -113,9 +114,17 @@ bool compress(CommandLineArguments const& command_line_arguments) {
}

clp_s::JsonParser parser(option);
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
if (false == parser.parse()) {
SPDLOG_ERROR("Encountered error while parsing input");
if (CommandLineArguments::FileType::KeyValueIr == option.input_file_type) {
// Functionality Coming in later PR
// -->Call new parsing function in Json Parser to parse IRv2 to archive
// -->Check for error from parsing function
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
SPDLOG_ERROR("Compressing Key Valur IR Files is not yet supported");
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
return false;
} else {
if (false == parser.parse()) {
SPDLOG_ERROR("Encountered error while parsing input");
return false;
}
}
parser.store();
return true;
Expand Down
Loading