Skip to content

Commit

Permalink
[ci release] Make fast path for regex matching
Browse files Browse the repository at this point in the history
After introduction of boolean pattern combinations all types of matchers
have to fill a map of sub-patterns results. If boolean combinations are
disabled, a simple boolean result can be returned as there is always
one sub-pattern. Performance of simple regular expressions matching is
much better now.
  • Loading branch information
variar committed May 27, 2021
1 parent c16f41d commit 6aa7ee4
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 26 deletions.
13 changes: 10 additions & 3 deletions src/logdata/include/data/hsregularexpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@

#include "regularexpressionpattern.h"

using MatchedPatterns = robin_hood::unordered_flat_map<std::string, bool>;
using MatchingResult = std::variant<bool, MatchedPatterns>;

class DefaultRegularExpressionMatcher {
public:
explicit DefaultRegularExpressionMatcher(
Expand All @@ -60,14 +63,18 @@ class DefaultRegularExpressionMatcher {
return matchingPatterns;
}

robin_hood::unordered_flat_map<std::string, bool> match( const std::string_view& utf8Data ) const
MatchingResult match( const std::string_view& utf8Data ) const
{
robin_hood::unordered_flat_map<std::string, bool> matchingPatterns;
MatchedPatterns matchingPatterns;
for ( const auto& regexp : regexp_ ) {
const auto hasMatch = regexp.second
.match( QString::fromUtf8(
utf8Data.data(), static_cast<int>( utf8Data.size() ) ) )
.hasMatch();
if ( regexp_.size() == 1 ) {
return hasMatch;
}

matchingPatterns.emplace( regexp.first, hasMatch );
}
return matchingPatterns;
Expand All @@ -93,7 +100,7 @@ class HsMatcher {
HsMatcher( HsMatcher&& other ) = default;
HsMatcher& operator=( HsMatcher&& other ) = default;

robin_hood::unordered_map<std::string, bool> match( const std::string_view& utf8Data ) const;
MatchingResult match( const std::string_view& utf8Data ) const;

private:
HsDatabase database_;
Expand Down
9 changes: 6 additions & 3 deletions src/logdata/include/data/regularexpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,14 @@

#include <QString>



#include "hsregularexpression.h"

class PatternMatcher;
class BooleanExpressionEvaluator;

class RegularExpression {
public:
RegularExpression( const RegularExpressionPattern& pattern);
RegularExpression( const RegularExpressionPattern& pattern );

std::unique_ptr<PatternMatcher> createMatcher() const;

Expand All @@ -44,6 +42,7 @@ class RegularExpression {

private:
bool isInverse_ = false;
bool isBooleanCombination_ = false;

QString expression_;
std::vector<RegularExpressionPattern> subPatterns_;
Expand All @@ -68,6 +67,10 @@ class PatternMatcher {

private:
bool isInverse_ = false;
bool isBooleanCombination_ = false;

std::string mainPatternId_;

MatcherVariant matcher_;

std::unique_ptr<BooleanExpressionEvaluator> evaluator_;
Expand Down
40 changes: 25 additions & 15 deletions src/logdata/src/hsregularexpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ namespace {
struct MatcherContext {
const std::vector<std::string>& patternIds;
robin_hood::unordered_flat_set<std::string>& matchingPatterns;
bool hasMatch = false;
};

int matchCallback( unsigned int id, unsigned long long from, unsigned long long to,
Expand All @@ -42,10 +43,15 @@ int matchCallback( unsigned int id, unsigned long long from, unsigned long long
Q_UNUSED( to );
Q_UNUSED( flags );

MatcherContext* matchResult = static_cast<MatcherContext*>( context );
MatcherContext* matchContext = static_cast<MatcherContext*>( context );

const auto& patternId = matchResult->patternIds[ id ];
matchResult->matchingPatterns.insert( patternId );
if ( matchContext->patternIds.size() == 1 ) {
matchContext->hasMatch = true;
}
else {
const auto& patternId = matchContext->patternIds[ id ];
matchContext->matchingPatterns.insert( patternId );
}

return 0;
}
Expand All @@ -59,29 +65,33 @@ HsMatcher::HsMatcher( HsDatabase db, HsScratch scratch, const std::vector<std::s
{
}

robin_hood::unordered_flat_map<std::string, bool> HsMatcher::match( const std::string_view& utf8Data ) const
MatchingResult HsMatcher::match( const std::string_view& utf8Data ) const
{
robin_hood::unordered_flat_set<std::string> matchingPatterns;

if ( !scratch_ || !database_ ) {
return {};
}

robin_hood::unordered_flat_set<std::string> matchingPatterns;

MatcherContext context{ patternIds_, matchingPatterns };

hs_scan( database_.get(), utf8Data.data(), static_cast<unsigned int>( utf8Data.size() ), 0,
scratch_.get(), matchCallback, static_cast<void*>( &context ) );

robin_hood::unordered_map<std::string, bool> results;
for ( const auto& patternId : patternIds_ ) {
results[ patternId ] = false;
}
if ( patternIds_.size() > 1 ) {
robin_hood::unordered_map<std::string, bool> results;
for ( const auto& patternId : patternIds_ ) {
results[ patternId ] = false;
}

for ( const auto& match : matchingPatterns ) {
results[ match ] = true;
}
for ( const auto& match : matchingPatterns ) {
results[ match ] = true;
}

return results;
return results;
}
else {
return context.hasMatch;
}
}

HsRegularExpression::HsRegularExpression( const RegularExpressionPattern& pattern )
Expand Down
24 changes: 19 additions & 5 deletions src/logdata/src/regularexpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "configuration.h"
#include "log.h"
#include "overload_visitor.h"
#include "uuid.h"

#include "regularexpression.h"
Expand Down Expand Up @@ -144,6 +145,7 @@ class BooleanExpressionEvaluator {

RegularExpression::RegularExpression( const RegularExpressionPattern& pattern )
: isInverse_( pattern.isExclude )
, isBooleanCombination_( pattern.isBoolean )
, expression_( pattern.pattern )
{
try {
Expand Down Expand Up @@ -190,6 +192,8 @@ std::unique_ptr<PatternMatcher> RegularExpression::createMatcher() const

PatternMatcher::PatternMatcher( const RegularExpression& expression )
: isInverse_( expression.isInverse_ )
, isBooleanCombination_( expression.isBooleanCombination_ )
, mainPatternId_( expression.subPatterns_.front().id() )
, matcher_( expression.hsExpression_.createMatcher() )
, evaluator_( std::make_unique<BooleanExpressionEvaluator>(
expression.expression_.toStdString(), expression.subPatterns_ ) )
Expand All @@ -214,9 +218,19 @@ bool PatternMatcher::hasMatchInternal( std::string_view line ) const
const auto results
= std::visit( [ &line ]( const auto& m ) { return m.match( line ); }, matcher_ );

if ( results.size() == 1 ) {
return results.begin()->second;
}

return evaluator_->evaluate( results );
return std::visit( makeOverloadVisitor(
[ this ]( bool hasMatch ) {
if ( !isBooleanCombination_ ) {
return hasMatch;
}
else {
MatchedPatterns matchedPatterns;
matchedPatterns.emplace( mainPatternId_, hasMatch );
return evaluator_->evaluate( matchedPatterns );
}
},
[ this ]( const MatchedPatterns& matchedPatterns ) {
return evaluator_->evaluate( matchedPatterns );
} ),
results );
}

0 comments on commit 6aa7ee4

Please sign in to comment.