Skip to content

Commit

Permalink
combine new i command line option into the c command line option
Browse files Browse the repository at this point in the history
  • Loading branch information
AVMatthews committed Dec 4, 2024
1 parent 60af455 commit 2734aa0
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 214 deletions.
154 changes: 6 additions & 148 deletions components/core/src/clp_s/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,11 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
std::cerr << " c - compress" << std::endl;
std::cerr << " x - decompress" << std::endl;
std::cerr << " s - search" << std::endl;
std::cerr << " i - compress IR format" << std::endl;
std::cerr << std::endl;
std::cerr << "Try "
<< " c --help OR"
<< " x --help OR"
<< " s --help OR"
<< " i --help for command-specific details." << std::endl;
<< " s --help for command-specific details." << std::endl;

po::options_description visible_options;
visible_options.add(general_options);
Expand All @@ -127,7 +125,6 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
case (char)Command::Compress:
case (char)Command::Extract:
case (char)Command::Search:
case (char)Command::IrCompress:
m_command = (Command)command_input;
break;
default:
Expand Down Expand Up @@ -205,6 +202,11 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"disable-log-order",
po::bool_switch(&m_disable_log_order),
"Do not record log order at ingestion time."
)(
"file-type",
po::value<std::string>(&m_file_type)->value_name("FILE_TYPE")->
default_value(m_file_type),
"The type of file that is to be compressed to archive (e.g Json or IR)"
);
// clang-format on

Expand Down Expand Up @@ -699,147 +701,6 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"The --count-by-time and --count options are mutually exclusive."
);
}
} else if (Command::IrCompress == m_command) {
po::options_description compression_positional_options;
// clang-format off
compression_positional_options.add_options()(
"archives-dir",
po::value<std::string>(&m_archives_dir)->value_name("DIR"),
"output directory"
)(
"input-paths",
po::value<std::vector<std::string>>(&m_file_paths)->value_name("PATHS"),
"input paths"
);
// clang-format on

po::options_description compression_options("Compression options");
std::string metadata_db_config_file_path;
std::string input_path_list_file_path;
// clang-format off
compression_options.add_options()(
"compression-level",
po::value<int>(&m_compression_level)->value_name("LEVEL")->
default_value(m_compression_level),
"1 (fast/low compression) to 9 (slow/high compression)."
)(
"target-encoded-size",
po::value<size_t>(&m_target_encoded_size)->value_name("TARGET_ENCODED_SIZE")->
default_value(m_target_encoded_size),
"Target size (B) for the dictionaries and encoded messages before a new "
"archive is created."
)(
"min-table-size",
po::value<size_t>(&m_minimum_table_size)->value_name("MIN_TABLE_SIZE")->
default_value(m_minimum_table_size),
"Minimum size (B) for a packed table before it gets compressed."
)(
"max-document-size",
po::value<size_t>(&m_max_document_size)->value_name("DOC_SIZE")->
default_value(m_max_document_size),
"Maximum allowed size (B) for a single document before compression fails."
)(
"timestamp-key",
po::value<std::string>(&m_timestamp_key)->value_name("TIMESTAMP_COLUMN_KEY")->
default_value(m_timestamp_key),
"Path (e.g. x.y) for the field containing the log event's timestamp."
)(
"db-config-file",
po::value<std::string>(&metadata_db_config_file_path)->value_name("FILE")->
default_value(metadata_db_config_file_path),
"Global metadata DB YAML config"
)(
"files-from,f",
po::value<std::string>(&input_path_list_file_path)
->value_name("FILE")
->default_value(input_path_list_file_path),
"Compress files specified in FILE"
)(
"print-archive-stats",
po::bool_switch(&m_print_archive_stats),
"Print statistics (json) about the archive after it's compressed."
)(
"single-file-archive",
po::bool_switch(&m_single_file_archive),
"Create a single archive file instead of multiple files."
)(
"disable-log-order",
po::bool_switch(&m_disable_log_order),
"Do not record log order at ingestion time."
);
// clang-format on

po::positional_options_description positional_options;
positional_options.add("archives-dir", 1);
positional_options.add("input-paths", -1);

po::options_description all_compression_options;
all_compression_options.add(compression_options);
all_compression_options.add(compression_positional_options);

std::vector<std::string> unrecognized_options
= po::collect_unrecognized(parsed.options, po::include_positional);
unrecognized_options.erase(unrecognized_options.begin());
po::store(
po::command_line_parser(unrecognized_options)
.options(all_compression_options)
.positional(positional_options)
.run(),
parsed_command_line_options
);
po::notify(parsed_command_line_options);

if (parsed_command_line_options.count("help")) {
print_ir_compression_usage();

std::cerr << "Examples:\n";
std::cerr << " # Compress file1.ir and dir1 into archives-dir\n";
std::cerr << " " << m_program_name << " i archives-dir file1.ir dir1\n";

po::options_description visible_options;
visible_options.add(general_options);
visible_options.add(compression_options);
std::cerr << visible_options << '\n';
return ParsingResult::InfoCommand;
}

if (m_archives_dir.empty()) {
throw std::invalid_argument("No archives directory specified.");
}

if (false == input_path_list_file_path.empty()) {
if (false == read_paths_from_file(input_path_list_file_path, m_file_paths)) {
SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path);
return ParsingResult::Failure;
}
}

if (m_file_paths.empty()) {
throw std::invalid_argument("No input paths specified.");
}

// Parse and validate global metadata DB config
if (false == metadata_db_config_file_path.empty()) {
clp::GlobalMetadataDBConfig metadata_db_config;
try {
metadata_db_config.parse_config_file(metadata_db_config_file_path);
} catch (std::exception& e) {
SPDLOG_ERROR("Failed to validate metadata database config - {}.", e.what());
return ParsingResult::Failure;
}

if (clp::GlobalMetadataDBConfig::MetadataDBType::MySQL
!= metadata_db_config.get_metadata_db_type())
{
SPDLOG_ERROR(
"Invalid metadata database type for {}; only supported type is MySQL.",
m_program_name
);
return ParsingResult::Failure;
}

m_metadata_db_config = std::move(metadata_db_config);
}
}
} catch (std::exception& e) {
SPDLOG_ERROR("{}", e.what());
Expand Down Expand Up @@ -954,7 +815,4 @@ void CommandLineArguments::print_search_usage() const {
<< std::endl;
}

void CommandLineArguments::print_ir_compression_usage() const {
std::cerr << "Usage: " << m_program_name << " i [OPTIONS] ARCHIVES_DIR [FILE/DIR ...]\n";
}
} // namespace clp_s
8 changes: 4 additions & 4 deletions components/core/src/clp_s/CommandLineArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ class CommandLineArguments {
enum class Command : char {
Compress = 'c',
Extract = 'x',
Search = 's',
IrCompress = 'i'
Search = 's'
};

enum class OutputHandlerType : uint8_t {
Expand Down Expand Up @@ -117,6 +116,8 @@ class CommandLineArguments {

bool get_record_log_order() const { return false == m_disable_log_order; }

[[nodiscard]] auto get_file_type() const -> std::string { return m_file_type; }

private:
// Methods
/**
Expand Down Expand Up @@ -164,8 +165,6 @@ class CommandLineArguments {

void print_decompression_usage() const;

void print_ir_compression_usage() const;

void print_search_usage() const;

// Variables
Expand All @@ -187,6 +186,7 @@ class CommandLineArguments {
size_t m_target_ordered_chunk_size{};
size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB
bool m_disable_log_order{false};
std::string m_file_type{"Json"};

// Metadata db variables
std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;
Expand Down
81 changes: 19 additions & 62 deletions components/core/src/clp_s/clp-s.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,16 @@ bool search_archive(
);

bool compress(CommandLineArguments const& command_line_arguments) {
auto file_type = command_line_arguments.get_file_type();
if ("IR" != file_type && "Json" != file_type) {
SPDLOG_ERROR("File Type specified is Invalid");
return false;
}
if ("IR" == file_type && command_line_arguments.get_structurize_arrays()) {
SPDLOG_ERROR("ERROR: structurized arrays are not supported for IR files");
return false;
}

auto archives_dir = std::filesystem::path(command_line_arguments.get_archives_dir());

// Create output directory in case it doesn't exist
Expand Down Expand Up @@ -120,69 +130,20 @@ bool compress(CommandLineArguments const& command_line_arguments) {
}

clp_s::JsonParser parser(option);
if (false == parser.parse()) {
SPDLOG_ERROR("Encountered error while parsing input");
return false;
if ("IR" == file_type) {
// Functionality Coming in later PR
// -->Call new parsing function in Json Parser to parse IRv2 to archive
// -->Check for error from parsing function
} else {
if (false == parser.parse()) {
SPDLOG_ERROR("Encountered error while parsing input");
return false;
}
}
parser.store();
return true;
}

auto setup_compression_options(
CommandLineArguments const& command_line_arguments,
clp_s::JsonParserOption& option
) -> bool {
auto archives_dir = std::filesystem::path(command_line_arguments.get_archives_dir());
// Create output directory in case it doesn't exist
try {
std::filesystem::create_directory(archives_dir.string());
} catch (std::exception& e) {
SPDLOG_ERROR(
"Failed to create archives directory {} - {}",
archives_dir.string(),
e.what()
);
return false;
}
option.file_paths = command_line_arguments.get_file_paths();
option.archives_dir = archives_dir.string();
option.target_encoded_size = command_line_arguments.get_target_encoded_size();
option.max_document_size = command_line_arguments.get_max_document_size();
option.min_table_size = command_line_arguments.get_minimum_table_size();
option.compression_level = command_line_arguments.get_compression_level();
option.timestamp_key = command_line_arguments.get_timestamp_key();
option.print_archive_stats = command_line_arguments.print_archive_stats();
option.single_file_archive = command_line_arguments.get_single_file_archive();
option.record_log_order = command_line_arguments.get_record_log_order();

auto const& db_config_container = command_line_arguments.get_metadata_db_config();
if (db_config_container.has_value()) {
auto const& db_config = db_config_container.value();
option.metadata_db = std::make_shared<clp::GlobalMySQLMetadataDB>(
db_config.get_metadata_db_host(),
db_config.get_metadata_db_port(),
db_config.get_metadata_db_username(),
db_config.get_metadata_db_password(),
db_config.get_metadata_db_name(),
db_config.get_metadata_table_prefix()
);
}
return true;
}

auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool {
clp_s::JsonParserOption option{};
if (false == setup_compression_options(command_line_arguments, option)) {
return false;
}

// Functionality Coming in later PR
// -->Instantiate Json Parser
// -->Call new parsing function in Json Parser to parse IRv2 to archive
// -->Store Archive
return true;
}

void decompress_archive(clp_s::JsonConstructorOption const& json_constructor_option) {
clp_s::JsonConstructor constructor(json_constructor_option);
constructor.store();
Expand Down Expand Up @@ -352,10 +313,6 @@ int main(int argc, char const* argv[]) {
if (false == compress(command_line_arguments)) {
return 1;
}
} else if (CommandLineArguments::Command::IrCompress == command_line_arguments.get_command()) {
if (false == ir_compress(command_line_arguments)) {
return 1;
}
} else if (CommandLineArguments::Command::Extract == command_line_arguments.get_command()) {
auto const& archives_dir = command_line_arguments.get_archives_dir();
if (false == std::filesystem::is_directory(archives_dir)) {
Expand Down

0 comments on commit 2734aa0

Please sign in to comment.