diff --git a/components/core/src/clp_s/CommandLineArguments.cpp b/components/core/src/clp_s/CommandLineArguments.cpp index 99539b627..c7fb9487e 100644 --- a/components/core/src/clp_s/CommandLineArguments.cpp +++ b/components/core/src/clp_s/CommandLineArguments.cpp @@ -148,6 +148,9 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { po::options_description compression_options("Compression options"); std::string metadata_db_config_file_path; std::string input_path_list_file_path; + constexpr std::string_view cJsonFileType{"json"}; + constexpr std::string_view cKeyValueIrFileType{"kv-ir"}; + std::string file_type{cJsonFileType}; // clang-format off compression_options.add_options()( "compression-level", @@ -202,6 +205,10 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { "disable-log-order", po::bool_switch(&m_disable_log_order), "Do not record log order at ingestion time." + )( + "file-type", + po::value(&file_type)->value_name("FILE_TYPE")->default_value(file_type), + "The type of file being compressed (json or kv-ir)" ); // clang-format on @@ -255,6 +262,22 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) { throw std::invalid_argument("No input paths specified."); } + if (cJsonFileType == file_type) { + m_file_type = FileType::Json; + } else if (cKeyValueIrFileType == file_type) { + m_file_type = FileType::KeyValueIr; + if (m_structurize_arrays) { + SPDLOG_ERROR( + "Invalid combination of arguments; --file-type {} and " + "--structurize-arrays can't be used together", + cKeyValueIrFileType + ); + return ParsingResult::Failure; + } + } else { + throw std::invalid_argument("Unknown FILE_TYPE: " + file_type); + } + // Parse and validate global metadata DB config if (false == metadata_db_config_file_path.empty()) { clp::GlobalMetadataDBConfig metadata_db_config; diff --git a/components/core/src/clp_s/CommandLineArguments.hpp b/components/core/src/clp_s/CommandLineArguments.hpp index a87e9b6bd..47c244646 100644 --- a/components/core/src/clp_s/CommandLineArguments.hpp +++ b/components/core/src/clp_s/CommandLineArguments.hpp @@ -36,6 +36,11 @@ class CommandLineArguments { Stdout, }; + enum class FileType : uint8_t { + Json = 0, + KeyValueIr + }; + // Constructors explicit CommandLineArguments(std::string const& program_name) : m_program_name(program_name) {} @@ -116,6 +121,8 @@ class CommandLineArguments { bool get_record_log_order() const { return false == m_disable_log_order; } + [[nodiscard]] auto get_file_type() const -> FileType { return m_file_type; } + private: // Methods /** @@ -184,6 +191,7 @@ class CommandLineArguments { size_t m_target_ordered_chunk_size{}; size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB bool m_disable_log_order{false}; + FileType m_file_type{FileType::Json}; // Metadata db variables std::optional m_metadata_db_config; diff --git a/components/core/src/clp_s/JsonParser.hpp b/components/core/src/clp_s/JsonParser.hpp index bfd423c22..c05ab9d60 100644 --- a/components/core/src/clp_s/JsonParser.hpp +++ b/components/core/src/clp_s/JsonParser.hpp @@ -12,6 +12,7 @@ #include "../clp/GlobalMySQLMetadataDB.hpp" #include "ArchiveWriter.hpp" +#include "CommandLineArguments.hpp" #include "DictionaryWriter.hpp" #include "FileReader.hpp" #include "FileWriter.hpp" @@ -29,6 +30,7 @@ using namespace simdjson; namespace clp_s { struct JsonParserOption { std::vector file_paths; + CommandLineArguments::FileType input_file_type{CommandLineArguments::FileType::Json}; std::string timestamp_key; std::string archives_dir; size_t target_encoded_size{}; diff --git a/components/core/src/clp_s/clp-s.cpp b/components/core/src/clp_s/clp-s.cpp index b76683caf..2c6639290 100644 --- a/components/core/src/clp_s/clp-s.cpp +++ b/components/core/src/clp_s/clp-s.cpp @@ -88,6 +88,7 @@ bool compress(CommandLineArguments const& command_line_arguments) { clp_s::JsonParserOption option{}; option.file_paths = command_line_arguments.get_file_paths(); + option.input_file_type = command_line_arguments.get_file_type(); option.archives_dir = archives_dir.string(); option.target_encoded_size = command_line_arguments.get_target_encoded_size(); option.max_document_size = command_line_arguments.get_max_document_size(); @@ -113,9 +114,17 @@ bool compress(CommandLineArguments const& command_line_arguments) { } clp_s::JsonParser parser(option); - if (false == parser.parse()) { - SPDLOG_ERROR("Encountered error while parsing input"); + if (CommandLineArguments::FileType::KeyValueIr == option.input_file_type) { + // Functionality Coming in later PR + // -->Call new parsing function in Json Parser to parse IRv2 to archive + // -->Check for error from parsing function + SPDLOG_ERROR("Compressing Key Value IR Files is not yet supported"); return false; + } else { + if (false == parser.parse()) { + SPDLOG_ERROR("Encountered error while parsing input"); + return false; + } } parser.store(); return true;