Skip to content

Commit

Permalink
Add options: --cache-size, CacheSize
Browse files Browse the repository at this point in the history
* Add new clamd and clamscan option --cache-size

This option allows you to set the number of entries the cache can store.

Additionally, introduce CacheSize as a clamd.conf
synonym for --cache-size.

Fixes #867
  • Loading branch information
candrews authored and micahsnyder committed May 17, 2023
1 parent 0bc766f commit e70493c
Show file tree
Hide file tree
Showing 12 changed files with 74 additions and 24 deletions.
2 changes: 2 additions & 0 deletions clamd/clamd.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ int main(int argc, char **argv)
break;
}

if ((opt = optget(opts, "cache-size"))->enabled)
cl_engine_set_num(engine, CL_ENGINE_CACHE_SIZE, opt->numarg);
if (optget(opts, "disable-cache")->enabled)
cl_engine_set_num(engine, CL_ENGINE_DISABLE_CACHE, 1);

Expand Down
2 changes: 2 additions & 0 deletions clamscan/manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,8 @@ int scanmanager(const struct optstruct *opts)
#endif
}

if ((opt = optget(opts, "cache-size"))->enabled)
cl_engine_set_num(engine, CL_ENGINE_CACHE_SIZE, opt->numarg);
if (optget(opts, "disable-cache")->enabled)
cl_engine_set_num(engine, CL_ENGINE_DISABLE_CACHE, 1);

Expand Down
2 changes: 2 additions & 0 deletions common/optparser.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ const struct clam_option __clam_options[] = {
/* config file/cmdline options */
{"AlertExceedsMax", "alert-exceeds-max", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "", ""},

{"CacheSize", "cache-size", 0, CLOPT_TYPE_NUMBER, MATCH_NUMBER, CLI_DEFAULT_CACHE_SIZE, NULL, 0, OPT_CLAMD | OPT_CLAMSCAN, "Number of entries the cache can store.", "65536"},

{"PreludeEnable", "prelude-enable", 0, CLOPT_TYPE_BOOL, MATCH_BOOL, 0, NULL, 0, OPT_CLAMD, "Enable prelude", ""},

{"PreludeAnalyzerName", "prelude-analyzer-name", 0, CLOPT_TYPE_STRING, NULL, -1, NULL, 0, OPT_CLAMD, "Name of the analyzer as seen in prewikka", ""},
Expand Down
5 changes: 5 additions & 0 deletions docs/man/clamd.conf.5.in
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,11 @@ By default, the engine will store an MD5 in a cache of any files that are not fl
.br
Default: no
.TP
\fBCacheSize\fR
This option allows you to set the number of entries the cache can store. The value should be a square number or will be rounded up to the nearest square number.
.br
Default: 65536
.TP
\fBForceToDisk\fR
This option causes memory or nested map scans to dump the content to disk.
.br
Expand Down
5 changes: 5 additions & 0 deletions etc/clamd.conf.sample
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,11 @@ Example
# Default: no
#DisableCache yes

# This option allows you to set the number of entries the cache can store.
# The value should be a square number or will be rounded up to the nearest
# square number.
#CacheSize 65536

# In some cases (eg. complex malware, exploits in graphic files, and others),
# ClamAV uses special algorithms to detect abnormal patterns and behaviors that
# may be malicious. This option enables alerting on such heuristically
Expand Down
58 changes: 34 additions & 24 deletions libclamav/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,26 @@
#include "mpool.h"
#include "clamav.h"
#include "cache.h"
#include "math.h"
#include "fmap.h"

#include "clamav_rust.h"

/* The number of root trees and the chooser function
/* The chooser function
Each tree is protected by a mutex against concurrent access */
/* #define TREES 1 */
/* static inline unsigned int getkey(uint8_t *hash) { return 0; } */
#define TREES 256
static inline unsigned int getkey(uint8_t *hash)
static inline unsigned int getkey(uint8_t *hash, size_t trees)
{
if (hash) {
return *hash;
// Take the first two bytes (16 bits) of the hash, which total to 65536 values,
// and modulus that by the number of trees desired.
// As long as trees < 65536, and the hash is uniformly distributed,
// the resulting key will be a good value to use a bucket identifier
// for evenly placing values.
return (hash[0] | (((unsigned int)hash[1]) << 8)) % trees;
}

return 0;
}
/* #define TREES 4096 */
/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | ((unsigned int)(hash[1] & 0xf)<<8) ; } */
/* #define TREES 65536 */
/* static inline unsigned int getkey(uint8_t *hash) { return hash[0] | (((unsigned int)hash[1])<<8) ; } */

/* The number of nodes in each tree */
#define NODES 256

/* SPLAY --------------------------------------------------------------------- */
struct node { /* a node */
Expand All @@ -77,33 +73,35 @@ struct cache_set { /* a tree */

struct CACHE {
struct cache_set cacheset;
uint32_t trees;
uint32_t nodes_per_tree;
#ifdef CL_THREAD_SAFE
pthread_mutex_t mutex;
#endif
};

/* Allocates all the nodes and sets up the replacement chain */
static int cacheset_init(struct cache_set *cs, mpool_t *mempool)
static int cacheset_init(struct cache_set *cs, mpool_t *mempool, uint32_t nodes_per_tree)
{
unsigned int i;

#ifndef USE_MPOOL
UNUSEDPARAM(mempool);
#endif

cs->data = MPOOL_CALLOC(mempool, NODES, sizeof(*cs->data));
cs->data = MPOOL_CALLOC(mempool, nodes_per_tree, sizeof(*cs->data));
cs->root = NULL;

if (!cs->data)
return 1;

for (i = 1; i < NODES; i++) {
for (i = 1; i < nodes_per_tree; i++) {
cs->data[i - 1].next = &cs->data[i];
cs->data[i].prev = &cs->data[i - 1];
}

cs->first = cs->data;
cs->last = &cs->data[NODES - 1];
cs->last = &cs->data[nodes_per_tree - 1];

return 0;
}
Expand Down Expand Up @@ -540,7 +538,7 @@ static int cache_lookup_hash(unsigned char *md5, size_t len, struct CACHE *cache
return ret;
}

key = getkey(md5);
key = getkey(md5, cache->trees);

c = &cache[key];

Expand Down Expand Up @@ -575,12 +573,24 @@ int clean_cache_init(struct cl_engine *engine)
return 0;
}

if (!(cache = MPOOL_MALLOC(engine->mempool, sizeof(struct CACHE) * TREES))) {
// The user requested the cache size to be engine->cache_size
// The nodes within each tree are locked together, so having one tree would result in excessive lock contention.
// However, having too many trees is inefficient.
// A good balance is to have trees and nodes per tree be equal, which is done by using the sqrt of the user request cache size.
const uint32_t trees = ceil(sqrt(engine->cache_size));
const uint32_t nodes_per_tree = ceil(sqrt(engine->cache_size));

cli_dbgmsg("clean_cache_init: Requested cache size: %d. Actual cache size: %d. Trees: %d. Nodes per tree: %d.\n", engine->cache_size, trees * nodes_per_tree, trees, nodes_per_tree);

if (!(cache = MPOOL_MALLOC(engine->mempool, sizeof(struct CACHE) * trees))) {
cli_errmsg("clean_cache_init: mpool malloc fail\n");
return 1;
}

for (i = 0; i < TREES; i++) {
cache->trees = trees;
cache->nodes_per_tree = nodes_per_tree;

for (i = 0; i < trees; i++) {
#ifdef CL_THREAD_SAFE
if (pthread_mutex_init(&cache[i].mutex, NULL)) {
cli_errmsg("clean_cache_init: mutex init fail\n");
Expand All @@ -590,7 +600,7 @@ int clean_cache_init(struct cl_engine *engine)
return 1;
}
#endif
if (cacheset_init(&cache[i].cacheset, engine->mempool)) {
if (cacheset_init(&cache[i].cacheset, engine->mempool, cache->nodes_per_tree)) {
for (j = 0; j < i; j++) cacheset_destroy(&cache[j].cacheset, engine->mempool);
#ifdef CL_THREAD_SAFE
for (j = 0; j <= i; j++) pthread_mutex_destroy(&cache[j].mutex);
Expand All @@ -615,7 +625,7 @@ void clean_cache_destroy(struct cl_engine *engine)
return;
}

for (i = 0; i < TREES; i++) {
for (i = 0; i < cache->trees; i++) {
cacheset_destroy(&cache[i].cacheset, engine->mempool);
#ifdef CL_THREAD_SAFE
pthread_mutex_destroy(&cache[i].mutex);
Expand Down Expand Up @@ -667,7 +677,7 @@ void clean_cache_add(unsigned char *md5, size_t size, cli_ctx *ctx)

level = (ctx->fmap && ctx->fmap->dont_cache_flag) ? ctx->recursion_level : 0;

key = getkey(md5);
key = getkey(md5, ctx->engine->cache->trees);
c = &ctx->engine->cache[key];

#ifdef CL_THREAD_SAFE
Expand Down Expand Up @@ -709,7 +719,7 @@ void clean_cache_remove(unsigned char *md5, size_t size, const struct cl_engine
return;
}

key = getkey(md5);
key = getkey(md5, engine->cache->trees);

c = &engine->cache[key];
#ifdef CL_THREAD_SAFE
Expand Down
1 change: 1 addition & 0 deletions libclamav/clamav.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ enum cl_engine_field {
CL_ENGINE_MAX_SCRIPTNORMALIZE, /* uint64_t */
CL_ENGINE_MAX_ZIPTYPERCG, /* uint64_t */
CL_ENGINE_FORCETODISK, /* uint32_t */
CL_ENGINE_CACHE_SIZE, /* uint32_t */
CL_ENGINE_DISABLE_CACHE, /* uint32_t */
CL_ENGINE_DISABLE_PE_STATS, /* uint32_t */
CL_ENGINE_STATS_TIMEOUT, /* uint32_t */
Expand Down
2 changes: 2 additions & 0 deletions libclamav/default.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@

#define CLI_DEFAULT_MAXPARTITIONS 50

#define CLI_DEFAULT_CACHE_SIZE 65536

/* TODO - set better defaults */
#define CLI_DEFAULT_PCRE_MATCH_LIMIT 100000
#define CLI_DEFAULT_PCRE_RECMATCH_LIMIT 2000
Expand Down
10 changes: 10 additions & 0 deletions libclamav/others.c
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ struct cl_engine *cl_engine_new(void)
new->maxhtmlnotags = CLI_DEFAULT_MAXHTMLNOTAGS;
new->maxscriptnormalize = CLI_DEFAULT_MAXSCRIPTNORMALIZE;
new->maxziptypercg = CLI_DEFAULT_MAXZIPTYPERCG;
new->cache_size = CLI_DEFAULT_CACHE_SIZE;

new->bytecode_security = CL_BYTECODE_TRUST_SIGNED;
/* 5 seconds timeout */
Expand Down Expand Up @@ -730,6 +731,11 @@ cl_error_t cl_engine_set_num(struct cl_engine *engine, enum cl_engine_field fiel
clean_cache_init(engine);
}
break;
case CL_ENGINE_CACHE_SIZE:
if (num) {
engine->cache_size = (uint32_t)num;
}
break;
case CL_ENGINE_DISABLE_PE_STATS:
if (num) {
engine->engine_options |= ENGINE_OPTIONS_DISABLE_PE_STATS;
Expand Down Expand Up @@ -846,6 +852,8 @@ long long cl_engine_get_num(const struct cl_engine *engine, enum cl_engine_field
return engine->bytecode_mode;
case CL_ENGINE_DISABLE_CACHE:
return engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE;
case CL_ENGINE_CACHE_SIZE:
return engine->cache_size;
case CL_ENGINE_STATS_TIMEOUT:
return ((cli_intel_t *)(engine->stats_data))->timeout;
case CL_ENGINE_MAX_PARTITIONS:
Expand Down Expand Up @@ -976,6 +984,7 @@ struct cl_settings *cl_engine_settings_copy(const struct cl_engine *engine)
settings->cb_meta = engine->cb_meta;
settings->cb_file_props = engine->cb_file_props;
settings->engine_options = engine->engine_options;
settings->cache_size = engine->cache_size;

settings->cb_stats_add_sample = engine->cb_stats_add_sample;
settings->cb_stats_remove_sample = engine->cb_stats_remove_sample;
Expand Down Expand Up @@ -1020,6 +1029,7 @@ cl_error_t cl_engine_settings_apply(struct cl_engine *engine, const struct cl_se
engine->bytecode_timeout = settings->bytecode_timeout;
engine->bytecode_mode = settings->bytecode_mode;
engine->engine_options = settings->engine_options;
engine->cache_size = settings->cache_size;

if (engine->tmpdir)
MPOOL_FREE(engine->mempool, engine->tmpdir);
Expand Down
2 changes: 2 additions & 0 deletions libclamav/others.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ struct cl_engine {
char *tmpdir;
uint32_t keeptmp;
uint64_t engine_options;
uint32_t cache_size;

/* Limits */
uint32_t maxscantime; /* Time limit (in milliseconds) */
Expand Down Expand Up @@ -492,6 +493,7 @@ struct cl_settings {
enum bytecode_mode bytecode_mode;
char *pua_cats;
uint64_t engine_options;
uint32_t cache_size;

/* callbacks */
clcb_pre_cache cb_pre_cache;
Expand Down
4 changes: 4 additions & 0 deletions libfreshclam/libfreshclam.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,10 @@ fc_error_t fc_test_database(const char *dbFilename, int bBytecodeEnabled)
goto done;
}

// Disable cache as testing the database doesn't need caching,
// having cache will only waste time and memory.
engine->engine_options |= ENGINE_OPTIONS_DISABLE_CACHE;

cl_engine_set_clcb_stats_submit(engine, NULL);

if (CL_SUCCESS != (cl_ret = cl_load(
Expand Down
5 changes: 5 additions & 0 deletions win32/conf_examples/clamd.conf.sample
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ TCPAddr localhost
# Default: no
#DisableCache yes

# This option allows you to set the number of entries the cache can store.
# The value should be a square number or will be rounded up to the nearest
# square number.
#CacheSize 65536

# In some cases (eg. complex malware, exploits in graphic files, and others),
# ClamAV uses special algorithms to detect abnormal patterns and behaviors that
# may be malicious. This option enables alerting on such heuristically
Expand Down

0 comments on commit e70493c

Please sign in to comment.