diff --git a/components/core/src/clp/LibarchiveFileReader.cpp b/components/core/src/clp/LibarchiveFileReader.cpp index 6ba0e980d..ff2059634 100644 --- a/components/core/src/clp/LibarchiveFileReader.cpp +++ b/components/core/src/clp/LibarchiveFileReader.cpp @@ -1,6 +1,7 @@ #include "LibarchiveFileReader.hpp" #include +#include #include "spdlog_with_specializations.hpp" @@ -38,7 +39,7 @@ LibarchiveFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& num_ while (true) { // Read a data block if necessary if (nullptr == m_data_block) { - auto error_code = read_next_data_block(); + auto error_code = read_next_nonempty_data_block(); if (ErrorCode_Success != error_code) { if (ErrorCode_EndOfFile == error_code && num_bytes_read > 0) { return ErrorCode_Success; @@ -111,7 +112,7 @@ ErrorCode LibarchiveFileReader::try_read_to_delimiter( while (true) { // Read a data block if necessary if (nullptr == m_data_block) { - auto error_code = read_next_data_block(); + auto error_code = read_next_nonempty_data_block(); if (ErrorCode_Success != error_code) { if (ErrorCode_EndOfFile == error_code && str.length() > original_str_length) { // NOTE: At this point, we haven't found delim, so return directly without @@ -206,7 +207,7 @@ void LibarchiveFileReader::close() { m_pos_in_file = 0; } -ErrorCode LibarchiveFileReader::try_load_data_block() { +auto LibarchiveFileReader::try_load_nonempty_data_block() -> ErrorCode { if (nullptr == m_archive) { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } @@ -217,10 +218,10 @@ ErrorCode LibarchiveFileReader::try_load_data_block() { if (m_data_block != nullptr) { return ErrorCode_Success; } - return read_next_data_block(); + return read_next_nonempty_data_block(); } -void LibarchiveFileReader::peek_buffered_data(char const*& buf, size_t& buf_size) const { +auto LibarchiveFileReader::peek_buffered_data() const -> std::span { if (nullptr == m_archive) { throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } @@ -228,38 +229,40 @@ void LibarchiveFileReader::peek_buffered_data(char const*& buf, size_t& buf_size throw OperationFailed(ErrorCode_NotInit, __FILENAME__, __LINE__); } if (nullptr == m_data_block) { - buf_size = 0; - buf = nullptr; - } else if (m_pos_in_file < m_data_block_pos_in_file) { + return {}; + } + if (m_pos_in_file < m_data_block_pos_in_file) { // Position in the file is before the current data block, so we return nulls corresponding // to the sparse bytes before the data block // NOTE: We don't return ALL sparse bytes before the data block since that might require // allocating more bytes, violating the const-ness of this method. Since peek is a // best-effort method, this should be sufficient for most callers. - buf = m_nulls_for_peek.data(); - buf_size = std::min( - m_nulls_for_peek.size(), - static_cast(m_data_block_pos_in_file - m_pos_in_file) - ); - } else { - buf_size = m_data_block_length - m_pos_in_data_block; - buf = static_cast(m_data_block); + return {m_nulls_for_peek.data(), + std::min( + m_nulls_for_peek.size(), + static_cast(m_data_block_pos_in_file - m_pos_in_file) + )}; } + + return {static_cast(m_data_block), m_data_block_length - m_pos_in_data_block}; } -ErrorCode LibarchiveFileReader::read_next_data_block() { - auto return_value = archive_read_data_block( - m_archive, - &m_data_block, - &m_data_block_length, - &m_data_block_pos_in_file - ); - if (ARCHIVE_OK != return_value) { - m_data_block = nullptr; - if (ARCHIVE_EOF == return_value) { - m_reached_eof = true; - return ErrorCode_EndOfFile; - } else { +auto LibarchiveFileReader::read_next_nonempty_data_block() -> ErrorCode { + m_data_block_length = 0; + m_pos_in_data_block = 0; + while (0 == m_data_block_length) { + auto return_value = archive_read_data_block( + m_archive, + &m_data_block, + &m_data_block_length, + &m_data_block_pos_in_file + ); + if (ARCHIVE_OK != return_value) { + m_data_block = nullptr; + if (ARCHIVE_EOF == return_value) { + m_reached_eof = true; + return ErrorCode_EndOfFile; + } SPDLOG_DEBUG( "Failed to read data block from libarchive - {}", archive_error_string(m_archive) @@ -268,8 +271,6 @@ ErrorCode LibarchiveFileReader::read_next_data_block() { } } - m_pos_in_data_block = 0; - return ErrorCode_Success; } } // namespace clp diff --git a/components/core/src/clp/LibarchiveFileReader.hpp b/components/core/src/clp/LibarchiveFileReader.hpp index 2b7d17fad..f357ecc1b 100644 --- a/components/core/src/clp/LibarchiveFileReader.hpp +++ b/components/core/src/clp/LibarchiveFileReader.hpp @@ -2,6 +2,7 @@ #define CLP_LIBARCHIVEFILEREADER_HPP #include +#include #include #include @@ -30,13 +31,7 @@ class LibarchiveFileReader : public ReaderInterface { }; // Constructors - LibarchiveFileReader() - : m_archive(nullptr), - m_archive_entry(nullptr), - m_data_block(nullptr), - m_reached_eof(false), - m_data_block_pos_in_file(0), - m_pos_in_file(0) {} + LibarchiveFileReader() = default; // Methods implementing the ReaderInterface /** @@ -89,43 +84,42 @@ class LibarchiveFileReader : public ReaderInterface { void close(); /** - * Tries to the load a data block from the file if none is loaded + * Tries to the load a nonempty data block from the file if none is loaded * @return ErrorCode_EndOfFile on EOF * @return ErrorCode_Failure on failure * @return ErrorCode_Success on success */ - [[nodiscard]] ErrorCode try_load_data_block(); + [[nodiscard]] auto try_load_nonempty_data_block() -> ErrorCode; /** * Peeks the remaining buffered content without advancing the read head. * * NOTE: Any subsequent read or seek operations may invalidate the returned buffer. - * @param buf Returns a pointer to any buffered data - * @param buf_size Returns the number of bytes in the buffer + * @return The view of the buffered data */ - void peek_buffered_data(char const*& buf, size_t& buf_size) const; + [[nodiscard]] auto peek_buffered_data() const -> std::span; private: // Methods /** - * Reads next data block from the archive + * Reads next nonempty data block from the archive * @return ErrorCode_EndOfFile on EOF * @return ErrorCode_Failure on failure * @return ErrorCode_Success on success */ - ErrorCode read_next_data_block(); + [[nodiscard]] auto read_next_nonempty_data_block() -> ErrorCode; // Variables - struct archive* m_archive; + struct archive* m_archive{nullptr}; - struct archive_entry* m_archive_entry; - la_int64_t m_data_block_pos_in_file; - void const* m_data_block; - size_t m_data_block_length; - la_int64_t m_pos_in_data_block; - bool m_reached_eof; + struct archive_entry* m_archive_entry{nullptr}; + la_int64_t m_data_block_pos_in_file{0}; + void const* m_data_block{nullptr}; + size_t m_data_block_length{0}; + la_int64_t m_pos_in_data_block{0}; + bool m_reached_eof{false}; - size_t m_pos_in_file; + size_t m_pos_in_file{0}; // Nulls for peek std::array m_nulls_for_peek{0}; diff --git a/components/core/src/clp/clp/FileCompressor.cpp b/components/core/src/clp/clp/FileCompressor.cpp index 4100816f5..949a677be 100644 --- a/components/core/src/clp/clp/FileCompressor.cpp +++ b/components/core/src/clp/clp/FileCompressor.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -343,7 +344,7 @@ bool FileCompressor::try_compressing_as_archive( m_libarchive_reader.open_file_reader(m_libarchive_file_reader); // Check that file is UTF-8 encoded - if (auto error_code = m_libarchive_file_reader.try_load_data_block(); + if (auto error_code = m_libarchive_file_reader.try_load_nonempty_data_block(); ErrorCode_Success != error_code && ErrorCode_EndOfFile != error_code) { SPDLOG_ERROR( @@ -355,12 +356,10 @@ bool FileCompressor::try_compressing_as_archive( succeeded = false; continue; } - char const* utf8_validation_buf{nullptr}; - size_t peek_size{0}; - m_libarchive_file_reader.peek_buffered_data(utf8_validation_buf, peek_size); + auto const utf8_validation_buf{m_libarchive_file_reader.peek_buffered_data()}; string file_path{m_libarchive_reader.get_path()}; - auto utf8_validation_buf_len = std::min(peek_size, cUtfMaxValidationLen); - if (is_utf8_sequence(utf8_validation_buf_len, utf8_validation_buf)) { + auto utf8_validation_buf_len = std::min(utf8_validation_buf.size(), cUtfMaxValidationLen); + if (is_utf8_sequence(utf8_validation_buf_len, utf8_validation_buf.data())) { auto boost_path_for_compression = parent_boost_path / file_path; if (use_heuristic) { parse_and_encode_with_heuristic( @@ -383,7 +382,10 @@ bool FileCompressor::try_compressing_as_archive( m_libarchive_file_reader ); } - } else if (has_ir_stream_magic_number({utf8_validation_buf, peek_size})) { + } else if (has_ir_stream_magic_number( + {utf8_validation_buf.data(), utf8_validation_buf.size()} + )) + { // Remove .clp suffix if found static constexpr char cIrStreamExtension[] = ".clp"; if (boost::iends_with(file_path, cIrStreamExtension)) {