Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(clp-s): Add command line options for stubbed out kv-pair-IR ingestion. #618

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 149 additions & 1 deletion components/core/src/clp_s/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,13 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
std::cerr << " c - compress" << std::endl;
std::cerr << " x - decompress" << std::endl;
std::cerr << " s - search" << std::endl;
std::cerr << " i - compress IR format" << std::endl;
std::cerr << std::endl;
std::cerr << "Try "
<< " c --help OR"
<< " x --help OR"
<< " s --help for command-specific details." << std::endl;
<< " s --help OR"
<< " i --help for command-specific details." << std::endl;

po::options_description visible_options;
visible_options.add(general_options);
Expand All @@ -125,6 +127,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
case (char)Command::Compress:
case (char)Command::Extract:
case (char)Command::Search:
case (char)Command::IrCompress:
m_command = (Command)command_input;
break;
default:
Expand Down Expand Up @@ -696,6 +699,147 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"The --count-by-time and --count options are mutually exclusive."
);
}
} else if (Command::IrCompress == m_command) {
po::options_description compression_positional_options;
// clang-format off
compression_positional_options.add_options()(
"archives-dir",
po::value<std::string>(&m_archives_dir)->value_name("DIR"),
"output directory"
)(
"input-paths",
po::value<std::vector<std::string>>(&m_file_paths)->value_name("PATHS"),
"input paths"
);
// clang-format on

po::options_description compression_options("Compression options");
std::string metadata_db_config_file_path;
std::string input_path_list_file_path;
// clang-format off
compression_options.add_options()(
"compression-level",
po::value<int>(&m_compression_level)->value_name("LEVEL")->
default_value(m_compression_level),
"1 (fast/low compression) to 9 (slow/high compression)."
)(
"target-encoded-size",
po::value<size_t>(&m_target_encoded_size)->value_name("TARGET_ENCODED_SIZE")->
default_value(m_target_encoded_size),
"Target size (B) for the dictionaries and encoded messages before a new "
"archive is created."
)(
"min-table-size",
po::value<size_t>(&m_minimum_table_size)->value_name("MIN_TABLE_SIZE")->
default_value(m_minimum_table_size),
"Minimum size (B) for a packed table before it gets compressed."
)(
"max-document-size",
po::value<size_t>(&m_max_document_size)->value_name("DOC_SIZE")->
default_value(m_max_document_size),
"Maximum allowed size (B) for a single document before compression fails."
)(
"timestamp-key",
po::value<std::string>(&m_timestamp_key)->value_name("TIMESTAMP_COLUMN_KEY")->
default_value(m_timestamp_key),
"Path (e.g. x.y) for the field containing the log event's timestamp."
)(
"db-config-file",
po::value<std::string>(&metadata_db_config_file_path)->value_name("FILE")->
default_value(metadata_db_config_file_path),
"Global metadata DB YAML config"
)(
"files-from,f",
po::value<std::string>(&input_path_list_file_path)
->value_name("FILE")
->default_value(input_path_list_file_path),
"Compress files specified in FILE"
)(
"print-archive-stats",
po::bool_switch(&m_print_archive_stats),
"Print statistics (json) about the archive after it's compressed."
)(
"single-file-archive",
po::bool_switch(&m_single_file_archive),
"Create a single archive file instead of multiple files."
)(
"disable-log-order",
po::bool_switch(&m_disable_log_order),
"Do not record log order at ingestion time."
);
// clang-format on

po::positional_options_description positional_options;
positional_options.add("archives-dir", 1);
positional_options.add("input-paths", -1);

po::options_description all_compression_options;
all_compression_options.add(compression_options);
all_compression_options.add(compression_positional_options);

std::vector<std::string> unrecognized_options
= po::collect_unrecognized(parsed.options, po::include_positional);
unrecognized_options.erase(unrecognized_options.begin());
po::store(
po::command_line_parser(unrecognized_options)
.options(all_compression_options)
.positional(positional_options)
.run(),
parsed_command_line_options
);
po::notify(parsed_command_line_options);

if (parsed_command_line_options.count("help")) {
print_ir_compression_usage();

std::cerr << "Examples:\n";
std::cerr << " # Compress file1.ir and dir1 into archives-dir\n";
std::cerr << " " << m_program_name << " i archives-dir file1.ir dir1\n";

po::options_description visible_options;
visible_options.add(general_options);
visible_options.add(compression_options);
std::cerr << visible_options << '\n';
return ParsingResult::InfoCommand;
}

if (m_archives_dir.empty()) {
throw std::invalid_argument("No archives directory specified.");
}

if (false == input_path_list_file_path.empty()) {
if (false == read_paths_from_file(input_path_list_file_path, m_file_paths)) {
SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path);
return ParsingResult::Failure;
}
}

if (m_file_paths.empty()) {
throw std::invalid_argument("No input paths specified.");
}

// Parse and validate global metadata DB config
if (false == metadata_db_config_file_path.empty()) {
clp::GlobalMetadataDBConfig metadata_db_config;
try {
metadata_db_config.parse_config_file(metadata_db_config_file_path);
} catch (std::exception& e) {
SPDLOG_ERROR("Failed to validate metadata database config - {}.", e.what());
return ParsingResult::Failure;
}

if (clp::GlobalMetadataDBConfig::MetadataDBType::MySQL
!= metadata_db_config.get_metadata_db_type())
{
SPDLOG_ERROR(
"Invalid metadata database type for {}; only supported type is MySQL.",
m_program_name
);
return ParsingResult::Failure;
}

m_metadata_db_config = std::move(metadata_db_config);
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
}
}
} catch (std::exception& e) {
SPDLOG_ERROR("{}", e.what());
Expand Down Expand Up @@ -809,4 +953,8 @@ void CommandLineArguments::print_search_usage() const {
" [OUTPUT_HANDLER [OUTPUT_HANDLER_OPTIONS]]"
<< std::endl;
}

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
void CommandLineArguments::print_ir_compression_usage() const {
std::cerr << "Usage: " << m_program_name << " i [OPTIONS] ARCHIVES_DIR [FILE/DIR ...]\n";
}
} // namespace clp_s
5 changes: 4 additions & 1 deletion components/core/src/clp_s/CommandLineArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class CommandLineArguments {
enum class Command : char {
Compress = 'c',
Extract = 'x',
Search = 's'
Search = 's',
IrCompress = 'i'
};

enum class OutputHandlerType : uint8_t {
Expand Down Expand Up @@ -163,6 +164,8 @@ class CommandLineArguments {

void print_decompression_usage() const;

void print_ir_compression_usage() const;

void print_search_usage() const;

// Variables
Expand Down
66 changes: 66 additions & 0 deletions components/core/src/clp_s/clp-s.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ namespace {
*/
bool compress(CommandLineArguments const& command_line_arguments);

/**
* Compresses the input IR files specified by the command line arguments into an archive.
* @param command_line_arguments
* @return Whether compression was successful
*/
auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool;

/**
* Decompresses the archive specified by the given JsonConstructorOption.
* @param json_constructor_option
Expand Down Expand Up @@ -121,6 +128,61 @@ bool compress(CommandLineArguments const& command_line_arguments) {
return true;
}

auto setup_compression_options(
CommandLineArguments const& command_line_arguments,
clp_s::JsonParserOption& option
) -> bool {
auto archives_dir = std::filesystem::path(command_line_arguments.get_archives_dir());
// Create output directory in case it doesn't exist
try {
std::filesystem::create_directory(archives_dir.string());
} catch (std::exception& e) {
SPDLOG_ERROR(
"Failed to create archives directory {} - {}",
archives_dir.string(),
e.what()
);
return false;
}
option.file_paths = command_line_arguments.get_file_paths();
option.archives_dir = archives_dir.string();
option.target_encoded_size = command_line_arguments.get_target_encoded_size();
option.max_document_size = command_line_arguments.get_max_document_size();
option.min_table_size = command_line_arguments.get_minimum_table_size();
option.compression_level = command_line_arguments.get_compression_level();
option.timestamp_key = command_line_arguments.get_timestamp_key();
option.print_archive_stats = command_line_arguments.print_archive_stats();
option.single_file_archive = command_line_arguments.get_single_file_archive();
option.record_log_order = command_line_arguments.get_record_log_order();

auto const& db_config_container = command_line_arguments.get_metadata_db_config();
if (db_config_container.has_value()) {
auto const& db_config = db_config_container.value();
option.metadata_db = std::make_shared<clp::GlobalMySQLMetadataDB>(
db_config.get_metadata_db_host(),
db_config.get_metadata_db_port(),
db_config.get_metadata_db_username(),
db_config.get_metadata_db_password(),
db_config.get_metadata_db_name(),
db_config.get_metadata_table_prefix()
);
}
return true;
}

auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool {
clp_s::JsonParserOption option{};
if (false == setup_compression_options(command_line_arguments, option)) {
return false;
}

// Functionality Coming in later PR
// -->Instantiate Json Parser
// -->Call new parsing function in Json Parser to parse IRv2 to archive
// -->Store Archive
return true;
}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

void decompress_archive(clp_s::JsonConstructorOption const& json_constructor_option) {
clp_s::JsonConstructor constructor(json_constructor_option);
constructor.store();
Expand Down Expand Up @@ -290,6 +352,10 @@ int main(int argc, char const* argv[]) {
if (false == compress(command_line_arguments)) {
return 1;
}
} else if (CommandLineArguments::Command::IrCompress == command_line_arguments.get_command()) {
if (false == ir_compress(command_line_arguments)) {
return 1;
}
} else if (CommandLineArguments::Command::Extract == command_line_arguments.get_command()) {
auto const& archives_dir = command_line_arguments.get_archives_dir();
if (false == std::filesystem::is_directory(archives_dir)) {
Expand Down
Loading