Skip to content

Commit

Permalink
[Dropper] + replace std::regex with re2 #182 #181
Browse files Browse the repository at this point in the history
Signed-off-by: Gheorghita Mutu <gheorghitamutu@gmail.com>
  • Loading branch information
gheorghitamutu committed Apr 11, 2024
1 parent 320732f commit 4713383
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 27 deletions.
3 changes: 3 additions & 0 deletions GViewCore/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ message(STATUS "SQLITE3_INCLUDE_DIR => ${SQLITE3_INCLUDE_DIR}")
find_package(unofficial-sqlite3 CONFIG REQUIRED)
target_link_libraries(${PROJECT_NAME} PRIVATE unofficial::sqlite3::sqlite3)

find_package(re2 CONFIG REQUIRED)
target_link_libraries(${PROJECT_NAME} PRIVATE re2::re2)

if (MSVC)
add_compile_options(-W3)
elseif (APPLE)
Expand Down
15 changes: 15 additions & 0 deletions GViewCore/include/GView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,21 @@ namespace SQLite3
};
} // namespace SQLite3

namespace Regex
{
struct CORE_EXPORT Matcher {
private:
void* context{ nullptr };

public:
bool Init(std::string_view expression, bool isUnicode, bool isCaseSensitive);
Matcher() = default;
~Matcher();

bool Match(BufferView buffer, uint64& start, uint64& end);
};
} // namespace Regex

/*
* Object can be:
* - a file
Expand Down
1 change: 1 addition & 0 deletions GViewCore/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_subdirectory(ZLIB)
add_subdirectory(Dissasembly)
add_subdirectory(ZIP)
add_subdirectory(SQLite3)
add_subdirectory(Regex)

if(NOT DEFINED CMAKE_TESTING_ENABLED)
target_sources(GViewCore PRIVATE main.cpp)
Expand Down
3 changes: 3 additions & 0 deletions GViewCore/src/Regex/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
target_sources(GViewCore PRIVATE
regex_wrapper.cpp
)
55 changes: 55 additions & 0 deletions GViewCore/src/Regex/regex_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "../include/GView.hpp"

#include <string>
#include <re2/re2.h>

namespace GView::Regex
{
struct Context {
bool isUnicode{ false };
bool isCaseSensitive{ false };
RE2 expression;
};

bool Matcher::Init(std::string_view expression, bool isUnicode, bool isCaseSensitive)
{
CHECK(this->context == nullptr, false, "");

RE2::Options options;
options.set_case_sensitive(isCaseSensitive);

absl::string_view asv{ expression.data(), expression.size() };

auto c = new Context{
.isUnicode = isUnicode,
.isCaseSensitive = isCaseSensitive,
.expression = RE2(asv, options),
};

this->context = c;
}

Matcher::~Matcher()
{
if (this->context != nullptr) {
delete reinterpret_cast<Context*>(this->context);
}
}

bool Matcher::Match(BufferView buffer, uint64& start, uint64& end)
{
auto ctx = reinterpret_cast<Context*>(this->context);
CHECK(ctx != nullptr, false, "");
CHECK(ctx->expression.ok(), false, "");

absl::string_view sv{ reinterpret_cast<const char*>(buffer.GetData()), buffer.GetLength() };
re2::StringPiece result;
if (RE2::PartialMatch(sv, ctx->expression, &result)) {
start = result.data() - sv.data();
end = start + result.size();
return true;
}

return false;
}
} // namespace GView::Regex
7 changes: 3 additions & 4 deletions GenericPlugins/Dropper/include/SpecialStrings.hpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
#pragma once

#include "IDrop.hpp"

#include <string>
#include <regex>

namespace GView::GenericPlugins::Droppper::SpecialStrings
{
class IpAddress : public IDrop
{
private:
std::regex pattern_ascii;
std::wregex pattern_unicode;
bool unicode{ false };
bool caseSensitive{ false };
std::regex_constants::syntax_option_type regexConstants{ std::regex_constants::ECMAScript | std::regex_constants::optimize };
GView::Regex::Matcher matcherAscii{};
GView::Regex::Matcher matcherUnicode{};

public:
IpAddress(bool caseSensitive, bool unicode);
Expand Down
36 changes: 13 additions & 23 deletions GenericPlugins/Dropper/src/SpecialStrings/IpAddress.cpp
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#include "SpecialStrings.hpp"

#include <regex>
#include <string>

namespace GView::GenericPlugins::Droppper::SpecialStrings
{
inline static const std::string_view IPS_REGEX_ASCII{ R"(([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}(\:[0-9]{1,5})*))" };
inline static const std::u16string_view IPS_REGEX_UNICODE{ uR"(([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}(\:[0-9]{1,5})*))" };
static const std::string_view IPS_REGEX_ASCII{ R"(^([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}(\:[0-9]{1,5})*))" };
static const std::string_view IPS_REGEX_UNICODE{
R"(^(([0-9]\x00){1,3}\.\x00([0-9]\x00){1,3}\.\x00([0-9]\x00){1,3}\.\x00([0-9]\x00){1,3}(\:\x00([0-9]\x00){1,5})*))"
};

IpAddress::IpAddress(bool caseSensitive, bool unicode)
{
this->unicode = unicode;
this->caseSensitive = caseSensitive;
if (this->caseSensitive) {
this->regexConstants |= std::regex_constants::icase;
}
this->pattern_ascii = std::regex(IPS_REGEX_ASCII.data(), this->regexConstants);
this->pattern_unicode = std::wregex(reinterpret_cast<wchar_t const* const>(IPS_REGEX_UNICODE.data()), this->regexConstants);
this->matcherAscii.Init(IPS_REGEX_ASCII, unicode, caseSensitive);
this->matcherUnicode.Init(IPS_REGEX_UNICODE, unicode, caseSensitive);
}

const char* IpAddress::GetName()
Expand Down Expand Up @@ -51,31 +51,21 @@ Result IpAddress::Check(uint64 offset, DataCache& file, BufferView precachedBuff
auto buffer = file.Get(offset, 39 * 2, false); // IPv6 length in Unicode
CHECK(buffer.GetLength() >= 14, Result::NotFound, ""); // not enough for IPv4 => length in ASCII

// https://stackoverflow.com/questions/26696250/difference-between-stdregex-match-stdregex-search

auto bStart = reinterpret_cast<char const*>(buffer.GetData());
const auto bEnd = reinterpret_cast<char const*>(bStart + buffer.GetLength());

std::cmatch acm{};
if (std::regex_search(bStart, bEnd, acm, this->pattern_ascii)) {
start = offset + acm.position();
end = start + acm.length();
if (this->matcherAscii.Match(buffer, start, end)) {
start += offset;
end += offset;
return Result::Ascii;
}

CHECK(unicode, Result::NotFound, "");
CHECK(precachedBuffer.GetData()[1] == 0, Result::NotFound, ""); // we already checked ascii printable

auto b2Start = reinterpret_cast<wchar_t const*>(buffer.GetData());
const auto b2End = reinterpret_cast<wchar_t const*>(buffer.GetData() + buffer.GetLength());
std::wcmatch wcm{};
if (std::regex_search(b2Start, b2End, wcm, this->pattern_unicode)) {
start = offset + wcm.position() * sizeof(wchar_t);
end = start + (wcm.length() + 1) * sizeof(wchar_t);
if (this->matcherUnicode.Match(buffer, start, end)) {
start += offset;
end += offset;
return Result::Unicode;
}

return Result::NotFound;
}

} // namespace GView::GenericPlugins::Droppper::SpecialStrings
4 changes: 4 additions & 0 deletions vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@
{
"name": "catch2",
"platform": "windows | linux | osx"
},
{
"name": "re2",
"platform": "windows | linux | osx"
}
]
}

0 comments on commit 4713383

Please sign in to comment.