Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add RareTermsAggregation #146

Merged
merged 2 commits into from
Jun 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ toc:
- NestedAggregation
- ParentAggregation
- RangeAggregation
- RareTermsAggregation
- ReverseNestedAggregation
- SamplerAggregation
- TermsAggregationBase
Expand Down
1 change: 1 addition & 0 deletions src/aggregations/bucket-aggregations/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ exports.MissingAggregation = require('./missing-aggregation');
exports.NestedAggregation = require('./nested-aggregation');
exports.ParentAggregation = require('./parent-aggregation');
exports.RangeAggregation = require('./range-aggregation');
exports.RareTermsAggregation = require('./rare-terms-aggregation');
exports.ReverseNestedAggregation = require('./reverse-nested-aggregation');
exports.SamplerAggregation = require('./sampler-aggregation');
exports.SignificantTermsAggregation = require('./significant-terms-aggregation');
Expand Down
128 changes: 128 additions & 0 deletions src/aggregations/bucket-aggregations/rare-terms-aggregation.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
'use strict';

const isNil = require('lodash.isnil');

const BucketAggregationBase = require('./bucket-aggregation-base');

const ES_REF_URL =
'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-rare-terms-aggregation.html';

/**
* A multi-bucket value source based aggregation which finds
* "rare" terms — terms that are at the long-tail of the
* distribution and are not frequent. Conceptually, this is like
* a terms aggregation that is sorted by `_count` ascending.
* As noted in the terms aggregation docs, actually ordering
* a `terms` agg by count ascending has unbounded error.
* Instead, you should use the `rare_terms` aggregation
*
* [Elasticsearch reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-rare-terms-aggregation.html)
*
* NOTE: Only available in Elasticsearch 7.3.0+.
*
* @example
* const agg = esb.rareTermsAggregation('genres', 'genre');
*
* @param {string} name The name which will be used to refer to this aggregation.
* @param {string} field The field we wish to find rare terms in
*
* @extends BucketAggregationBase
*/
class RareTermsAggregation extends BucketAggregationBase {
// eslint-disable-next-line require-jsdoc
constructor(name, field) {
super(name, 'rare_terms', field);
}

/**
* Sets the maximum number of documents a term should appear in.
*
* @example
* const agg = esb.rareTermsAggregation('genres', 'genre').maxDocCount(2);
*
* @param {number} maxDocCnt Integer value for maximum number of documents a term should appear in.
* Max doc count can be between 1 and 100.
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
maxDocCount(maxDocCnt) {
if (isNil(maxDocCnt) || maxDocCnt < 1 || maxDocCnt > 100) {
throw new Error('`maxDocCount` can only be value from 1 to 100.');
}

this._aggsDef.max_doc_count = maxDocCnt;
return this;
}

/**
* Sets the precision of the internal CuckooFilters. Smaller precision
* leads to better approximation, but higher memory usage.
* Cannot be smaller than 0.00001
*
* @example
* const agg = esb.rareTermsAggregation('genres', 'genre').precision(0.001);
*
* @param {number} precision Float value for precision of the internal CuckooFilters. Default is 0.01
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
precision(precision) {
if (precision < 0.00001) {
throw new Error('`precision` must be greater than 0.00001.');
}

this._aggsDef.precision = precision;
return this;
}

/**
* Sets terms that should be included in the aggregation
*
* @example
* const agg = esb.rareTermsAggregation('genres', 'genre').include('swi*');
*
* @param {string} include Regular expression that will determine what values
* are "allowed" to be aggregated
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
include(include) {
this._aggsDef.include = include;
return this;
}

/**
* Sets terms that should be excluded from the aggregation
*
* @example
* const agg = esb.rareTermsAggregation('genres', 'genre').exclude('electro*');
*
* @param {string} exclude Regular expression that will determine what values
* should not be aggregated
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
exclude(exclude) {
this._aggsDef.exclude = exclude;
return this;
}

/**
* Sets the missing parameter which defines how documents
* that are missing a value should be treated.
*
* @param {string} value
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
missing(value) {
this._aggsDef.missing = value;
return this;
}

/**
* @override
* @throws {Error} This method cannot be called on RareTermsAggregation
*/
script() {
console.log(`Please refer ${ES_REF_URL}`);
throw new Error('script is not supported in RareTermsAggregation');
}
}

module.exports = RareTermsAggregation;
90 changes: 90 additions & 0 deletions src/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5923,6 +5923,96 @@ declare namespace esb {
field?: string
): RangeAggregation;

/**
* A multi-bucket value source based aggregation which finds
* "rare" terms — terms that are at the long-tail of the
* distribution and are not frequent. Conceptually, this is like
* a terms aggregation that is sorted by `_count` ascending.
* As noted in the terms aggregation docs, actually ordering
* a `terms` agg by count ascending has unbounded error.
* Instead, you should use the `rare_terms` aggregation
*
* NOTE: Only available in Elasticsearch 7.3.0+.
*
* @param {string} name The name which will be used to refer to this aggregation.
* @param {string} field The field we wish to find rare terms in
* @extends BucketAggregationBase
*/
export class RareTermsAggregation extends BucketAggregationBase {
constructor(name: string, field: string);

/**
* Sets the maximum number of documents a term should appear in.
*
* @param {number} maxDocCnt Integer value for maximum number of documents a term should appear in.
* Max doc count can be between 1 and 100.
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
maxDocCount(maxDocCnt: number): this;

/**
* Sets the precision of the internal CuckooFilters. Smaller precision
* leads to better approximation, but higher memory usage.
* Cannot be smaller than 0.00001
*
* @param {number} precision Float value for precision of the internal CuckooFilters. Default is 0.01
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
precision(precision: number): this;

/**
* Sets terms that should be included in the aggregation
*
* @param {string} include Regular expression that will determine what values
* are "allowed" to be aggregated
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
include(include: string): this;

/**
* Sets terms that should be excluded from the aggregation
*
* @param {string} exclude Regular expression that will determine what values
* should not be aggregated
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
exclude(exclude: string): this;

/**
* Sets the missing parameter which defines how documents
* that are missing a value should be treated.
*
* @param {string} value
* @returns {RareTermsAggregation} returns `this` so that calls can be chained
*/
missing(value: string): this;

/**
* @override
* @throws {Error} This method cannot be called on RareTermsAggregation
*/
script(): never;
}

/**
* A multi-bucket value source based aggregation which finds
* "rare" terms — terms that are at the long-tail of the
* distribution and are not frequent. Conceptually, this is like
* a terms aggregation that is sorted by `_count` ascending.
* As noted in the terms aggregation docs, actually ordering
* a `terms` agg by count ascending has unbounded error.
* Instead, you should use the `rare_terms` aggregation
*
* NOTE: Only available in Elasticsearch 7.3.0+.
*
* @param {string} name The name which will be used to refer to this aggregation.
* @param {string} field The field we wish to find rare terms in
*/
export function rareTermsAggregation(
name: string,
field: string
): RareTermsAggregation;

/**
* A special single bucket aggregation that enables aggregating
* on parent docs from nested documents. Effectively this
Expand Down
4 changes: 4 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ const {
NestedAggregation,
ParentAggregation,
RangeAggregation,
RareTermsAggregation,
ReverseNestedAggregation,
SamplerAggregation,
SignificantTermsAggregation,
Expand Down Expand Up @@ -429,6 +430,9 @@ exports.parentAggregation = constructorWrapper(ParentAggregation);
exports.RangeAggregation = RangeAggregation;
exports.rangeAggregation = constructorWrapper(RangeAggregation);

exports.RareTermsAggregation = RareTermsAggregation;
exports.rareTermsAggregation = constructorWrapper(RareTermsAggregation);

exports.ReverseNestedAggregation = ReverseNestedAggregation;
exports.reverseNestedAggregation = constructorWrapper(ReverseNestedAggregation);

Expand Down
134 changes: 134 additions & 0 deletions test/aggregations-test/rare-terms-aggregation.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import test from 'ava';
import { RareTermsAggregation } from '../../src';
import {
setsAggType,
makeSetsOptionMacro,
nameTypeExpectStrategy,
illegalCall
} from '../_macros';

const getInstance = field => new RareTermsAggregation('my_agg', field);

const setsOption = makeSetsOptionMacro(
getInstance,
nameTypeExpectStrategy('my_agg', 'rare_terms')
);

test(setsAggType, RareTermsAggregation, 'rare_terms');
test(setsOption, 'maxDocCount', { param: 42 });
test(setsOption, 'precision', { param: 0.001 });
test(setsOption, 'include', { param: 'swi*' });
test(setsOption, 'exclude', { param: 'electro*' });
test(setsOption, 'missing', { param: 'N/A' });
test(illegalCall, RareTermsAggregation, 'script', 'my_agg');

test('tries to construct agg name if not given', t => {
const value = new RareTermsAggregation(null, 'myfield').toJSON();
const expected = {
agg_rare_terms_myfield: {
rare_terms: {
field: 'myfield'
}
}
};
t.deepEqual(value, expected);
});

test('maxDocCount is set', t => {
const value = getInstance('my_field')
.maxDocCount(42)
.toJSON();

const expected = {
my_agg: {
rare_terms: {
field: 'my_field',
max_doc_count: 42
}
}
};
t.deepEqual(value, expected);
});

test('maxDocCount correctly validated', t => {
let err = t.throws(() => getInstance().maxDocCount(null), Error);
t.is(err.message, '`maxDocCount` can only be value from 1 to 100.');

err = t.throws(() => getInstance().maxDocCount(undefined), Error);
t.is(err.message, '`maxDocCount` can only be value from 1 to 100.');

err = t.throws(() => getInstance().maxDocCount(0), Error);
t.is(err.message, '`maxDocCount` can only be value from 1 to 100.');

err = t.throws(() => getInstance().maxDocCount(101), Error);
t.is(err.message, '`maxDocCount` can only be value from 1 to 100.');
});

test('precision is set', t => {
const value = getInstance('my_field')
.precision(0.001)
.toJSON();

const expected = {
my_agg: {
rare_terms: {
field: 'my_field',
precision: 0.001
}
}
};
t.deepEqual(value, expected);
});

test('precision correctly validated', t => {
const err = t.throws(() => getInstance().precision(0.000001), Error);
t.is(err.message, '`precision` must be greater than 0.00001.');
});

test('include is set', t => {
const value = getInstance('my_field')
.include('swi*')
.toJSON();

const expected = {
my_agg: {
rare_terms: {
field: 'my_field',
include: 'swi*'
}
}
};
t.deepEqual(value, expected);
});

test('exclude is set', t => {
const value = getInstance('my_field')
.exclude('electro*')
.toJSON();

const expected = {
my_agg: {
rare_terms: {
field: 'my_field',
exclude: 'electro*'
}
}
};
t.deepEqual(value, expected);
});

test('missing is set', t => {
const value = getInstance('my_field')
.missing('N/A')
.toJSON();

const expected = {
my_agg: {
rare_terms: {
field: 'my_field',
missing: 'N/A'
}
}
};
t.deepEqual(value, expected);
});
Loading