From 881a67ec5c39c03a5d5b39862ed0054a583f31b6 Mon Sep 17 00:00:00 2001 From: Anton Filimonov Date: Wed, 5 May 2021 13:10:52 +0300 Subject: [PATCH] Use string_view for safety and clarity Using string_view instead of low level manipulations with pointers seems to be much more readable and safer. Should prevent off-by-one errors. Although this is used in core indexing loop, string_view::find(char) seems to be either a wrapper around std::memchr, so performance should stay the same. --- src/logdata/src/logdataworker.cpp | 90 +++++++++++++++---------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/src/logdata/src/logdataworker.cpp b/src/logdata/src/logdataworker.cpp index 3b1fca971..ef43ea5f6 100644 --- a/src/logdata/src/logdataworker.cpp +++ b/src/logdata/src/logdataworker.cpp @@ -50,6 +50,7 @@ #include #include +#include #include constexpr int IndexingBlockSize = 1 * 1024 * 1024; @@ -160,7 +161,7 @@ void LogDataWorker::indexAll( QTextCodec* forcedEncoding ) operationFuture_.waitForFinished(); interruptRequest_.clear(); - operationFuture_ = QtConcurrent::run( [ this, forcedEncoding, fileName = fileName_ ] { + operationFuture_ = QtConcurrent::run( [this, forcedEncoding, fileName = fileName_] { auto operationRequested = std::make_unique( fileName, indexing_data_, interruptRequest_, forcedEncoding ); return connectSignalsAndRun( operationRequested.get() ); @@ -175,7 +176,7 @@ void LogDataWorker::indexAdditionalLines() operationFuture_.waitForFinished(); interruptRequest_.clear(); - operationFuture_ = QtConcurrent::run( [ this, fileName = fileName_ ] { + operationFuture_ = QtConcurrent::run( [this, fileName = fileName_] { auto operationRequested = std::make_unique( fileName, indexing_data_, interruptRequest_ ); return connectSignalsAndRun( operationRequested.get() ); @@ -190,7 +191,7 @@ void LogDataWorker::checkFileChanges() operationFuture_.waitForFinished(); interruptRequest_.clear(); - operationFuture_ = QtConcurrent::run( [ this, fileName = fileName_ ] { + operationFuture_ = QtConcurrent::run( [this, fileName = fileName_] { auto operationRequested = std::make_unique( fileName, indexing_data_, interruptRequest_ ); @@ -253,38 +254,35 @@ FastLinePositionArray IndexOperation::parseDataBlock( LineOffset::UnderlyingType int pos_within_block = 0; - const auto adjustToCharWidth = [ &state ]( auto pos ) { - return static_cast( pos ) - state.encodingParams.getBeforeCrOffset(); + const auto charOffsetWithinBlock = [&state, block_start = block.data()]( const char* pointer ) { + return static_cast( std::distance( block_start, pointer ) ) + - state.encodingParams.getBeforeCrOffset(); }; - const auto expandTabs = [ & ]( const char* start_in_line, auto line_size ) { - auto tab_search_start = start_in_line; - auto next_tab = reinterpret_cast( - std::memchr( tab_search_start, '\t', static_cast( line_size ) ) ); - while ( next_tab != nullptr ) { - pos_within_block = adjustToCharWidth( next_tab - block.data() ); - - LOG_DEBUG << "Tab at " << pos_within_block; - - state.additional_spaces - += TabStop - - ( static_cast( ( block_beginning - state.pos ) + pos_within_block - + state.additional_spaces ) - % TabStop ) - - 1; - - const auto tab_substring_size = static_cast( next_tab - tab_search_start ); - line_size -= tab_substring_size; - tab_search_start = next_tab + 1; - if ( line_size > 0 ) { - next_tab = reinterpret_cast( - std::memchr( tab_search_start, '\t', static_cast( line_size ) ) ); - } - else { - next_tab = nullptr; - } - } - }; + const auto expandTabs + = [&state, &charOffsetWithinBlock, pos_within_block]( absl::string_view blockToExpand ) { + while ( !blockToExpand.empty() ) { + const auto next_tab = blockToExpand.find( '\t' ); + if ( next_tab == absl::string_view::npos ) { + break; + } + + const auto tab_pos_within_block + = charOffsetWithinBlock( blockToExpand.begin() + next_tab ); + + LOG_DEBUG << "Tab at " << tab_pos_within_block; + + const auto current_expanded_size + = tab_pos_within_block - pos_within_block + state.additional_spaces; + + state.additional_spaces += TabStop - ( current_expanded_size % TabStop ) - 1; + if ( next_tab >= blockToExpand.size() ) { + break; + } + + blockToExpand.remove_prefix( next_tab + 1 ); + } + }; while ( pos_within_block != -1 ) { pos_within_block = qMax( static_cast( state.pos - block_beginning ), 0 ); @@ -292,19 +290,19 @@ FastLinePositionArray IndexOperation::parseDataBlock( LineOffset::UnderlyingType // Looking for the next \n, expanding tabs in the process const auto search_start = block.data() + pos_within_block; - const auto search_line_size = block.size() - pos_within_block; + const auto search_line_size = static_cast( block.size() - pos_within_block ); if ( search_line_size > 0 ) { - const auto next_line_feed = reinterpret_cast( - std::memchr( search_start, '\n', static_cast( search_line_size ) ) ); + const auto block_view = absl::string_view( search_start, search_line_size ); + const auto next_line_feed = block_view.find( '\n' ); - if ( next_line_feed != nullptr ) { - expandTabs( search_start, next_line_feed - search_start ); - pos_within_block = adjustToCharWidth( next_line_feed - block.data() ); + if ( next_line_feed != absl::string_view::npos ) { + expandTabs( block_view.substr( 0, next_line_feed ) ); + pos_within_block = charOffsetWithinBlock( search_start + next_line_feed ); LOG_DEBUG << "LF at " << pos_within_block; } else { - expandTabs( search_start, search_line_size ); + expandTabs( block_view ); pos_within_block = -1; } } @@ -404,11 +402,11 @@ void IndexOperation::doIndex( LineOffset initialPosition ) auto blockReaderAsync = tbb::flow::async_node( indexingGraph, tbb::flow::serial, - [ this, &localThreadPool, &file, &ioDuration ]( const auto&, auto& gateway ) { + [this, &localThreadPool, &file, &ioDuration]( const auto&, auto& gateway ) { gateway.reserve_wait(); QtConcurrent::run( - &localThreadPool, [ this, &file, &ioDuration, gw = std::ref( gateway ) ] { + &localThreadPool, [this, &file, &ioDuration, gw = std::ref( gateway )] { while ( !file.atEnd() ) { if ( interruptRequest_ ) { @@ -419,8 +417,8 @@ void IndexOperation::doIndex( LineOffset initialPosition ) QByteArray{ IndexingBlockSize, Qt::Uninitialized } }; clock::time_point ioT1 = clock::now(); - const auto readBytes - = static_cast( file.read( blockData.second.data(), blockData.second.size() ) ); + const auto readBytes = static_cast( + file.read( blockData.second.data(), blockData.second.size() ) ); if ( readBytes < 0 ) { LOG_ERROR << "Reading past the end of file"; @@ -456,7 +454,7 @@ void IndexOperation::doIndex( LineOffset initialPosition ) auto blockQueue = tbb::flow::queue_node( indexingGraph ); auto blockParser = tbb::flow::function_node( - indexingGraph, 1, [ this, &state ]( const BlockData& blockData ) { + indexingGraph, 1, [this, &state]( const BlockData& blockData ) { const auto& block_beginning = blockData.first; const auto& block = blockData.second; @@ -643,7 +641,7 @@ MonitoredFileStatus CheckFileChangesOperation::doCheckFileChanges() return MonitoredFileStatus::Truncated; } - const auto getDigest = [ &file, &buffer ]( const qint64 indexedSize ) { + const auto getDigest = [&file, &buffer]( const qint64 indexedSize ) { FileDigest fileDigest; auto readSize = 0ll; auto totalSize = 0ll;