Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Load compressed CBOR files #229

Merged
merged 11 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/ci-tests-clang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ jobs:
# a pull request then we can checkout the head.
fetch-depth: 2
submodules: 'recursive'

- name: Install xz
run: |
sudo apt-get install -y xz-utils

- name: Run cmake
run: |
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/ci-tests-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ jobs:
# a pull request then we can checkout the head.
fetch-depth: 2
submodules: 'recursive'

- name: Install xz
run: |
brew install xz

- name: Run cmake
run: |
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/ci-tests-privacy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ jobs:
# a pull request then we can checkout the head.
fetch-depth: 2
submodules: 'recursive'

- name: Install xz
run: |
sudo apt-get install -y xz-utils

- name: Run cmake
run: |
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ jobs:
cmake .
make -j 4

- name: Install xz
run: |
sudo apt-get install -y xz-utils

- name: Perform Unit Tests
run: |
./ithitest
Expand Down
16 changes: 16 additions & 0 deletions ithiunit/unittest1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,5 +380,21 @@ namespace ithiunit
Assert::AreEqual(ret, true);
}

TEST_METHOD(IPStatsLoad)
{
IPStatsLoadTest test;
bool ret = test.DoTest();

Assert::AreEqual(ret, true);
}

TEST_METHOD(IPStatsXZ)
{
IPStatsXZTest test;
bool ret = test.DoTest();

Assert::AreEqual(ret, true);
}

};
}
3 changes: 1 addition & 2 deletions lib/DnsStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ DnsStats::~DnsStats()
static char const * DefaultRootAddresses[] = {
"2001:503:ba3e::2:30",
"198.41.0.4",
"2001:500:200::b",
"192.228.79.201", /* Since 2017-10-24, b.root-servers.net changed from 192.228.79.201 to 199.9.14.201 */
"199.9.14.201", /* See https://b.root-servers.org/news/2017/08/09/new-ipv4.html */
"2001:500:2::c",
Expand Down Expand Up @@ -240,7 +239,7 @@ char const * RegisteredTldName[] = {
"GLASS", "GLE", "GLOBAL", "GLOBO", "GM", "GMAIL", "GMBH", "GMO", "GMX", "GN", "GODADDY",
"GOLD", "GOLDPOINT", "GOLF", "GOO", "GOODYEAR", "GOOG", "GOOGLE", "GOP",
"GOT", "GOV", "GP", "GQ", "GR", "GRAINGER", "GRAPHICS", "GRATIS", "GREEN", "GRIPE",
"GROCERY", "GROUP", "GS", "GT", "GU", "GUARDIAN", "GUCCI", "GUGE", "GUIDE", "GUITARS",
"GROCERY", "GROUP", "GS", "GT", "GU", "GUCCI", "GUGE", "GUIDE", "GUITARS",
"GURU", "GW", "GY", "HAIR", "HAMBURG", "HANGOUT", "HAUS", "HBO", "HDFC", "HDFCBANK",
"HEALTH", "HEALTHCARE", "HELP", "HELSINKI", "HERE", "HERMES", "HIPHOP",
"HISAMITSU", "HITACHI", "HIV", "HK", "HKT", "HM", "HN", "HOCKEY", "HOLDINGS", "HOLIDAY",
Expand Down
113 changes: 56 additions & 57 deletions lib/ipstats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <algorithm>
#include <math.h>
#include <time.h>
#include "ithiutil.h"

#ifdef _WINDOWS
#define WIN32_LEAN_AND_MEAN
Expand Down Expand Up @@ -129,11 +130,6 @@ IPStatsRecord::IPStatsRecord() :
memset(name_parts, 0, sizeof(name_parts));
memset(rr_types, 0, sizeof(rr_types));
memset(locales, 0, sizeof(locales));
/* Debug variables */
tld_length = 0;
sld_length = 0;
memset(TLD, 0, 64);
memset(SLD, 0, 64);
}

IPStatsRecord::~IPStatsRecord()
Expand Down Expand Up @@ -475,10 +471,6 @@ const size_t nb_TLD_subset = sizeof(TLD_subset) / sizeof(const char*);

void IPStatsRecord::SetTLD(size_t tld_length, uint8_t* tld)
{
#if 1
this->tld_length = tld_length;
memcpy(this->TLD, tld, tld_length);
#endif
IPStatsRecord::SetXLD(tld_length, tld, TLD_subset, nb_TLD_subset, this->tld_counts, &this->tld_hyperlog);
}

Expand All @@ -491,44 +483,6 @@ const size_t nb_SLD_subset = sizeof(SLD_subset) / sizeof(const char*);
void IPStatsRecord::SetSLD(size_t sld_length, uint8_t* sld)
{
IPStatsRecord::SetXLD(sld_length, sld, SLD_subset, nb_SLD_subset, this->sld_counts, &this->sld_hyperlog);
#if 1
this->sld_length = sld_length;
memcpy(this->SLD, sld, sld_length);
#endif
}

void IPStatsRecord::DebugPrint(FILE* F)
{
#if 1
uint8_t test_ip[4] = { 10, 9, 1, 109 };
if (memcmp(this->ip_addr, test_ip, 4) == 0) {
uint64_t tld_hash = HyperLogLog::Fnv64(TLD, tld_length);
uint64_t sld_hash = HyperLogLog::Fnv64(SLD, sld_length);
fprintf(F, "IP:%d.%d.%d.%d,", this->ip_addr[0], this->ip_addr[1], this->ip_addr[2], this->ip_addr[3]);
fprintf(F, "TLD[%zu, 0x% " PRIx64 "] = .", tld_length, tld_hash);
for (size_t i = 0; i < tld_length; i++) {
int c = TLD[i];
if (c >= 32 && c < 127) {
fprintf(F, "%c.", c);
}
else {
fprintf(F, "\\%x.", c);
}
}

fprintf(F, ", SLD[%zu, 0x% " PRIx64 "] = .", sld_length, sld_hash);
for (size_t i = 0; i < sld_length; i++) {
int c = SLD[i];
if (c >= 32 && c < 127) {
fprintf(F, "%c.", c);
}
else {
fprintf(F, "\\%x.", c);
}
}
fprintf(F, "\n");
}
#endif
}

IPStats::IPStats()
Expand All @@ -539,13 +493,21 @@ IPStats::~IPStats()
{
}

bool IPStats::LoadCborFiles(size_t nb_files, char const** fileNames)
bool IPStats::LoadInputFiles(size_t nb_files, char const** fileNames)
{
bool ret = true;

for (size_t i = 0; ret && i < nb_files; i++)
{
ret = LoadCborFile(fileNames[i]);
/* If ends with ".cbor", load as cbor file */
if (ithi_endswith(fileNames[i], ".cbor")) {
ret = LoadCborFile(fileNames[i]);
}
/* If ends with ".cbor.xz", load as compressed cbor file */
else if (ithi_endswith(fileNames[i], ".cbor.xz")) {
ret = LoadCborCxFile(fileNames[i]);
}
/* If ends with ".csv", load as csv file */
}

return ret;
Expand All @@ -557,15 +519,37 @@ bool IPStats::LoadCborFile(char const* fileName)
int err;
bool ret = cdns_ctx.open(fileName);

while (ret) {
if (!cdns_ctx.open_block(&err)) {
ret = (err == CBOR_END_OF_ARRAY);
break;
}
for (size_t i = 0; i < cdns_ctx.block.queries.size(); i++) {
SubmitCborPacket(&cdns_ctx, i);
if (ret) {
ret = LoadCdnsRecords(&cdns_ctx, &err);
}

return ret;
}

bool IPStats::LoadCborCxFile(char const* fileName)
{
cdns cdns_ctx;
int err;
bool ret = true;
FILE* F = ithi_xzcat_decompress_open(fileName, &err);

if (F == NULL) {
fprintf(stderr, "Cannot open pipe for %s, err = 0x%x\n", fileName, err);
}
else {
ret = cdns_ctx.read_entire_file(F);

if (!ret) {
fprintf(stderr, "Cannot read data from %s, err = 0x%x\n", fileName, err);
} else {
ret = LoadCdnsRecords(&cdns_ctx, &err);
if (!ret) {
fprintf(stderr, "Cannot load records from %s, err = 0x%x\n", fileName, err);
}
}
ithi_pipe_close(F);
}

return ret;
}

Expand Down Expand Up @@ -629,6 +613,22 @@ bool IPStats::IPAddressIsLower(IPStatsRecord * x, IPStatsRecord * y)
return ret;
}

bool IPStats::LoadCdnsRecords(cdns * cdns_ctx, int * err)
{
bool ret = true;

while (ret) {
if (!cdns_ctx->open_block(err)) {
ret = (*err == CBOR_END_OF_ARRAY);
break;
}
for (size_t i = 0; i < cdns_ctx->block.queries.size(); i++) {
SubmitCborPacket(cdns_ctx, i);
}
}
return ret;
}

void IPStats::SubmitCborPacket(cdns* cdns_ctx, size_t packet_id)
{
/* TODO: add to database. */
Expand Down Expand Up @@ -692,7 +692,6 @@ void IPStats::SubmitCborPacket(cdns* cdns_ctx, size_t packet_id)
bool stored = false;
this->ip_records.InsertOrAdd(&ipsr, true, &stored);
}
ipsr.DebugPrint(stdout);
}

bool IPStats::IsRegisteredTLD(uint8_t* x, size_t l)
Expand Down
12 changes: 4 additions & 8 deletions lib/ipstats.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,16 +130,11 @@ class IPStatsRecord

bool WriteRecord(FILE* F);
static IPStatsRecord* ParseLine(char const* line);
void DebugPrint(FILE* F);
private:
bool WriteIP(FILE* F);
static void SetXLD(size_t xld_length, uint8_t * xld, const char ** XLD_subset, size_t nb_XLD_subset, uint64_t * xld_counts, HyperLogLog * xld_hyperlog);
void SetTLD(size_t tld_length, uint8_t* tld);
void SetSLD(size_t sld_length, uint8_t* sld);
size_t tld_length;
size_t sld_length;
uint8_t TLD[64];
uint8_t SLD[64];
};

class IPStats
Expand All @@ -149,8 +144,9 @@ class IPStats
~IPStats();

/* For the command line tools */
bool LoadCborFiles(size_t nb_files, char const** fileNames);
bool LoadCborFile(char const* fileNames);
bool LoadInputFiles(size_t nb_files, char const** fileNames);
bool LoadCborFile(char const* fileName);
bool LoadCborCxFile(char const* fileName);

bool SaveToCsv(char const* file_name);

Expand All @@ -165,8 +161,8 @@ class IPStats

private:
BinHash<IPStatsRecord> ip_records;
bool LoadCdnsRecords(cdns * cdns_ctx, int * err);
void SubmitCborPacket(cdns* cdns_ctx, size_t packet_id);
void LoadRegisteredTLD_from_memory();
static bool IPAddressIsLower(IPStatsRecord * x, IPStatsRecord * y);
BinHash<TldAsKey> registeredTld;
DnsStats dnsstats;
Expand Down
40 changes: 40 additions & 0 deletions lib/ithiutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,35 @@ FILE* ithi_gzip_compress_open(char const* file_name, int* last_err)
return F;
}

FILE* ithi_xzcat_decompress_open(char const* file_name, int* last_err)
{
FILE * F = NULL;
char const* xzcat_command = NULL;
char command[512];
int n_char = 0;

#ifdef _WINDOWS
/* Running on windows requires that 7z.exe is installed */
xzcat_command = "7z.exe e -so";
#else
xzcat_command = "xzcat -k";
#endif

#ifdef _WINDOWS
n_char = sprintf_s(command, sizeof(command), "%s %s", xzcat_command, file_name);
#else
n_char = sprintf(command, "%s %s", xzcat_command, file_name);
#endif
if (n_char <= 0) {
*last_err = -1;
}
else {
F = ithi_pipe_open(command, false, last_err);
}

return F;
}

void ithi_pipe_close(FILE* F)
{
#ifdef _WINDOWS
Expand Down Expand Up @@ -188,3 +217,14 @@ size_t ithi_copy_to_safe_text(char* text, size_t text_max, uint8_t * x_in, size_
return (text_length);
}

bool ithi_endswith(char const* target, char const* suffix)
{
size_t target_length = strlen(target);
size_t suffix_length = strlen(suffix);
bool ret = suffix_length <= target_length;
if (ret) {
size_t ix = target_length - suffix_length;
ret &= (strcmp(target + ix, suffix) == 0);
}
return ret;
}
4 changes: 4 additions & 0 deletions lib/ithiutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@ FILE* ithi_pipe_open(char const* command, bool pipe_write, int* last_err);

FILE* ithi_gzip_compress_open(char const* file_name, int* last_err);

FILE* ithi_xzcat_decompress_open(char const* file_name, int* last_err);

void ithi_pipe_close(FILE* F);

size_t ithi_copy_to_safe_text(char* text, size_t text_max, uint8_t* x_in, size_t l_in);

bool ithi_endswith(char const* target, char const* suffix);

#endif

8 changes: 4 additions & 4 deletions src/ithitools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,18 +642,18 @@ int main(int argc, char ** argv)

if (optind >= argc)
{
fprintf(stderr, "No capture file to analyze!\n");
fprintf(stderr, "No file to load!\n");
exit_code = usage();
}
else {
if (!ipstats.LoadCborFiles((size_t)argc - optind, (char const**)(argv + optind)))
if (!ipstats.LoadInputFiles((size_t)argc - optind, (char const**)(argv + optind)))
{
fprintf(stderr, "Cannot process the CBOR input files.\n");
fprintf(stderr, "Cannot process the input files.\n");
exit_code = -1;
}
else
{
printf("CBOR Capture processing succeeded, %d records.\n", ipstats.GetCount());
printf("File processing succeeded, %d records.\n", ipstats.GetCount());
if (!ipstats.SaveToCsv(ip_stats_csv)) {
fprintf(stderr, "Cannot save to csv file: %s.\n", ip_stats_csv);
exit_code = usage();
Expand Down
Loading
Loading