From ae136ea3e825115f146d9287298830c26df18119 Mon Sep 17 00:00:00 2001 From: Xavier Balloy <686305+xballoy@users.noreply.github.com> Date: Fri, 21 May 2021 12:02:46 -0400 Subject: [PATCH 1/2] Add RareTermsAggregation --- docs/documentation.yml | 1 + src/aggregations/bucket-aggregations/index.js | 1 + .../rare-terms-aggregation.js | 128 ++++++++++++++++ src/index.d.ts | 90 +++++++++++ src/index.js | 4 + .../rare-terms-aggregation.test.js | 140 ++++++++++++++++++ test/index.test.js | 3 + 7 files changed, 367 insertions(+) create mode 100644 src/aggregations/bucket-aggregations/rare-terms-aggregation.js create mode 100644 test/aggregations-test/rare-terms-aggregation.test.js diff --git a/docs/documentation.yml b/docs/documentation.yml index 4276c64..010578c 100644 --- a/docs/documentation.yml +++ b/docs/documentation.yml @@ -124,6 +124,7 @@ toc: - NestedAggregation - ParentAggregation - RangeAggregation + - RareTermsAggregation - ReverseNestedAggregation - SamplerAggregation - TermsAggregationBase diff --git a/src/aggregations/bucket-aggregations/index.js b/src/aggregations/bucket-aggregations/index.js index e1a16b9..9ab565b 100644 --- a/src/aggregations/bucket-aggregations/index.js +++ b/src/aggregations/bucket-aggregations/index.js @@ -25,6 +25,7 @@ exports.MissingAggregation = require('./missing-aggregation'); exports.NestedAggregation = require('./nested-aggregation'); exports.ParentAggregation = require('./parent-aggregation'); exports.RangeAggregation = require('./range-aggregation'); +exports.RareTermsAggregation = require('./rare-terms-aggregation'); exports.ReverseNestedAggregation = require('./reverse-nested-aggregation'); exports.SamplerAggregation = require('./sampler-aggregation'); exports.SignificantTermsAggregation = require('./significant-terms-aggregation'); diff --git a/src/aggregations/bucket-aggregations/rare-terms-aggregation.js b/src/aggregations/bucket-aggregations/rare-terms-aggregation.js new file mode 100644 index 0000000..3cbf02b --- /dev/null +++ b/src/aggregations/bucket-aggregations/rare-terms-aggregation.js @@ -0,0 +1,128 @@ +'use strict'; + +const isNil = require('lodash.isnil'); + +const BucketAggregationBase = require('./bucket-aggregation-base'); + +const ES_REF_URL = + 'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-rare-terms-aggregation.html'; + +/** + * A multi-bucket value source based aggregation which finds + * "rare" terms — terms that are at the long-tail of the + * distribution and are not frequent. Conceptually, this is like + * a terms aggregation that is sorted by `_count` ascending. + * As noted in the terms aggregation docs, actually ordering + * a `terms` agg by count ascending has unbounded error. + * Instead, you should use the `rare_terms` aggregation + * + * [Elasticsearch reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-rare-terms-aggregation.html) + * + * NOTE: Only available in Elasticsearch 7.3.0+. + * + * @example + * const agg = esb.rareTermsAggregation('genres', 'genre'); + * + * @param {string} name The name which will be used to refer to this aggregation. + * @param {string} field The field we wish to find rare terms in + * + * @extends BucketAggregationBase + */ +class RareTermsAggregation extends BucketAggregationBase { + // eslint-disable-next-line require-jsdoc + constructor(name, field) { + super(name, 'rare_terms', field); + } + + /** + * Sets the maximum number of documents a term should appear in. + * + * @example + * const agg = esb.rareTermsAggregation('genres', 'genre').maxDocCount(2); + * + * @param {number} maxDocCnt Integer value for maximum number of documents a term should appear in. + * Max doc count can be between 1 and 100. + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + maxDocCount(maxDocCnt) { + if (isNil(maxDocCnt) || maxDocCnt < 1 || maxDocCnt > 100) { + throw new Error('`maxDocCount` can only be value from 1 to 100.'); + } + + this._aggsDef.max_doc_count = maxDocCnt; + return this; + } + + /** + * Sets the precision of the internal CuckooFilters. Smaller precision + * leads to better approximation, but higher memory usage. + * Cannot be smaller than 0.00001 + * + * @example + * const agg = esb.rareTermsAggregation('genres', 'genre').precision(0.001); + * + * @param {number} precision Float value for precision of the internal CuckooFilters. Default is 0.01 + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + precision(precision) { + if (isNil(precision) || precision < 0.00001) { + throw new Error('`precision` cannot be smaller than 0.00001.'); + } + + this._aggsDef.precision = precision; + return this; + } + + /** + * Sets terms that should be included in the aggregation + * + * @example + * const agg = esb.rareTermsAggregation('genres', 'genre').include('swi*'); + * + * @param {string} include Regular expression that will determine what values + * are "allowed" to be aggregated + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + include(include) { + this._aggsDef.include = include; + return this; + } + + /** + * Sets terms that should be excluded from the aggregation + * + * @example + * const agg = esb.rareTermsAggregation('genres', 'genre').exclude('electro*'); + * + * @param {string} exclude Regular expression that will determine what values + * should not be aggregated + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + exclude(exclude) { + this._aggsDef.exclude = exclude; + return this; + } + + /** + * Sets the missing parameter which defines how documents + * that are missing a value should be treated. + * + * @param {string} value + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + missing(value) { + this._aggsDef.missing = value; + return this; + } + + /** + * @override + * @throws {Error} This method cannot be called on RareTermsAggregation + */ + script() { + console.log(`Please refer ${ES_REF_URL}`); + throw new Error('script is not supported in RareTermsAggregation'); + } +} + +module.exports = RareTermsAggregation; diff --git a/src/index.d.ts b/src/index.d.ts index 64f61e8..5e48497 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -5923,6 +5923,96 @@ declare namespace esb { field?: string ): RangeAggregation; + /** + * A multi-bucket value source based aggregation which finds + * "rare" terms — terms that are at the long-tail of the + * distribution and are not frequent. Conceptually, this is like + * a terms aggregation that is sorted by `_count` ascending. + * As noted in the terms aggregation docs, actually ordering + * a `terms` agg by count ascending has unbounded error. + * Instead, you should use the `rare_terms` aggregation + * + * NOTE: Only available in Elasticsearch 7.3.0+. + * + * @param {string} name The name which will be used to refer to this aggregation. + * @param {string} field The field we wish to find rare terms in + * @extends BucketAggregationBase + */ + export class RareTermsAggregation extends BucketAggregationBase { + constructor(name: string, field: string); + + /** + * Sets the maximum number of documents a term should appear in. + * + * @param {number} maxDocCnt Integer value for maximum number of documents a term should appear in. + * Max doc count can be between 1 and 100. + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + maxDocCount(maxDocCnt: number): this; + + /** + * Sets the precision of the internal CuckooFilters. Smaller precision + * leads to better approximation, but higher memory usage. + * Cannot be smaller than 0.00001 + * + * @param {number} precision Float value for precision of the internal CuckooFilters. Default is 0.01 + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + precision(precision: number): this; + + /** + * Sets terms that should be included in the aggregation + * + * @param {string} include Regular expression that will determine what values + * are "allowed" to be aggregated + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + include(include: string): this; + + /** + * Sets terms that should be excluded from the aggregation + * + * @param {string} exclude Regular expression that will determine what values + * should not be aggregated + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + exclude(exclude: string): this; + + /** + * Sets the missing parameter which defines how documents + * that are missing a value should be treated. + * + * @param {string} value + * @returns {RareTermsAggregation} returns `this` so that calls can be chained + */ + missing(value: string): this; + + /** + * @override + * @throws {Error} This method cannot be called on RareTermsAggregation + */ + script(): never; + } + + /** + * A multi-bucket value source based aggregation which finds + * "rare" terms — terms that are at the long-tail of the + * distribution and are not frequent. Conceptually, this is like + * a terms aggregation that is sorted by `_count` ascending. + * As noted in the terms aggregation docs, actually ordering + * a `terms` agg by count ascending has unbounded error. + * Instead, you should use the `rare_terms` aggregation + * + * NOTE: Only available in Elasticsearch 7.3.0+. + * + * @param {string} name The name which will be used to refer to this aggregation. + * @param {string} field The field we wish to find rare terms in + */ + export function rareTermsAggregation( + name: string, + field: string + ): RareTermsAggregation; + /** * A special single bucket aggregation that enables aggregating * on parent docs from nested documents. Effectively this diff --git a/src/index.js b/src/index.js index df91010..8e0fcab 100644 --- a/src/index.js +++ b/src/index.js @@ -118,6 +118,7 @@ const { NestedAggregation, ParentAggregation, RangeAggregation, + RareTermsAggregation, ReverseNestedAggregation, SamplerAggregation, SignificantTermsAggregation, @@ -429,6 +430,9 @@ exports.parentAggregation = constructorWrapper(ParentAggregation); exports.RangeAggregation = RangeAggregation; exports.rangeAggregation = constructorWrapper(RangeAggregation); +exports.RareTermsAggregation = RareTermsAggregation; +exports.rareTermsAggregation = constructorWrapper(RareTermsAggregation); + exports.ReverseNestedAggregation = ReverseNestedAggregation; exports.reverseNestedAggregation = constructorWrapper(ReverseNestedAggregation); diff --git a/test/aggregations-test/rare-terms-aggregation.test.js b/test/aggregations-test/rare-terms-aggregation.test.js new file mode 100644 index 0000000..abdf92b --- /dev/null +++ b/test/aggregations-test/rare-terms-aggregation.test.js @@ -0,0 +1,140 @@ +import test from 'ava'; +import { RareTermsAggregation } from '../../src'; +import { + setsAggType, + makeSetsOptionMacro, + nameTypeExpectStrategy, + illegalCall +} from '../_macros'; + +const getInstance = field => new RareTermsAggregation('my_agg', field); + +const setsOption = makeSetsOptionMacro( + getInstance, + nameTypeExpectStrategy('my_agg', 'rare_terms') +); + +test(setsAggType, RareTermsAggregation, 'rare_terms'); +test(setsOption, 'maxDocCount', { param: 42 }); +test(setsOption, 'precision', { param: 0.001 }); +test(setsOption, 'include', { param: 'swi*' }); +test(setsOption, 'exclude', { param: 'electro*' }); +test(setsOption, 'missing', { param: 'N/A' }); +test(illegalCall, RareTermsAggregation, 'script', 'my_agg'); + +test('tries to construct agg name if not given', t => { + const value = new RareTermsAggregation(null, 'myfield').toJSON(); + const expected = { + agg_rare_terms_myfield: { + rare_terms: { + field: 'myfield' + } + } + }; + t.deepEqual(value, expected); +}); + +test('maxDocCount is set', t => { + const value = getInstance('my_field') + .maxDocCount(42) + .toJSON(); + + const expected = { + my_agg: { + rare_terms: { + field: 'my_field', + max_doc_count: 42 + } + } + }; + t.deepEqual(value, expected); +}); + +test('maxDocCount correctly validated', t => { + let err = t.throws(() => getInstance().maxDocCount(null), Error); + t.is(err.message, '`maxDocCount` can only be value from 1 to 100.'); + + err = t.throws(() => getInstance().maxDocCount(undefined), Error); + t.is(err.message, '`maxDocCount` can only be value from 1 to 100.'); + + err = t.throws(() => getInstance().maxDocCount(0), Error); + t.is(err.message, '`maxDocCount` can only be value from 1 to 100.'); + + err = t.throws(() => getInstance().maxDocCount(101), Error); + t.is(err.message, '`maxDocCount` can only be value from 1 to 100.'); +}); + +test('precision is set', t => { + const value = getInstance('my_field') + .precision(0.001) + .toJSON(); + + const expected = { + my_agg: { + rare_terms: { + field: 'my_field', + precision: 0.001 + } + } + }; + t.deepEqual(value, expected); +}); + +test('precision correctly validated', t => { + let err = t.throws(() => getInstance().precision(null), Error); + t.is(err.message, '`precision` cannot be smaller than 0.00001.'); + + err = t.throws(() => getInstance().precision(undefined), Error); + t.is(err.message, '`precision` cannot be smaller than 0.00001.'); + + err = t.throws(() => getInstance().precision(0.000001), Error); + t.is(err.message, '`precision` cannot be smaller than 0.00001.'); +}); + +test('include is set', t => { + const value = getInstance('my_field') + .include('swi*') + .toJSON(); + + const expected = { + my_agg: { + rare_terms: { + field: 'my_field', + include: 'swi*' + } + } + }; + t.deepEqual(value, expected); +}); + +test('exclude is set', t => { + const value = getInstance('my_field') + .exclude('electro*') + .toJSON(); + + const expected = { + my_agg: { + rare_terms: { + field: 'my_field', + exclude: 'electro*' + } + } + }; + t.deepEqual(value, expected); +}); + +test('missing is set', t => { + const value = getInstance('my_field') + .missing('N/A') + .toJSON(); + + const expected = { + my_agg: { + rare_terms: { + field: 'my_field', + missing: 'N/A' + } + } + }; + t.deepEqual(value, expected); +}); diff --git a/test/index.test.js b/test/index.test.js index d1298bc..e2bbb0d 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -274,6 +274,9 @@ test('aggregations are exported', t => { t.truthy(esb.RangeAggregation); t.truthy(esb.rangeAggregation); + t.truthy(esb.RareTermsAggregation); + t.truthy(esb.rareTermsAggregation); + t.truthy(esb.ReverseNestedAggregation); t.truthy(esb.reverseNestedAggregation); From f049c39951264fc381caf05fc77074c09ed9726a Mon Sep 17 00:00:00 2001 From: Xavier Balloy <686305+xballoy@users.noreply.github.com> Date: Mon, 31 May 2021 08:45:16 -0400 Subject: [PATCH 2/2] fix: precision for rare_terms --- .../bucket-aggregations/rare-terms-aggregation.js | 4 ++-- test/aggregations-test/rare-terms-aggregation.test.js | 10 ++-------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/aggregations/bucket-aggregations/rare-terms-aggregation.js b/src/aggregations/bucket-aggregations/rare-terms-aggregation.js index 3cbf02b..7d8a41c 100644 --- a/src/aggregations/bucket-aggregations/rare-terms-aggregation.js +++ b/src/aggregations/bucket-aggregations/rare-terms-aggregation.js @@ -65,8 +65,8 @@ class RareTermsAggregation extends BucketAggregationBase { * @returns {RareTermsAggregation} returns `this` so that calls can be chained */ precision(precision) { - if (isNil(precision) || precision < 0.00001) { - throw new Error('`precision` cannot be smaller than 0.00001.'); + if (precision < 0.00001) { + throw new Error('`precision` must be greater than 0.00001.'); } this._aggsDef.precision = precision; diff --git a/test/aggregations-test/rare-terms-aggregation.test.js b/test/aggregations-test/rare-terms-aggregation.test.js index abdf92b..f6e628d 100644 --- a/test/aggregations-test/rare-terms-aggregation.test.js +++ b/test/aggregations-test/rare-terms-aggregation.test.js @@ -81,14 +81,8 @@ test('precision is set', t => { }); test('precision correctly validated', t => { - let err = t.throws(() => getInstance().precision(null), Error); - t.is(err.message, '`precision` cannot be smaller than 0.00001.'); - - err = t.throws(() => getInstance().precision(undefined), Error); - t.is(err.message, '`precision` cannot be smaller than 0.00001.'); - - err = t.throws(() => getInstance().precision(0.000001), Error); - t.is(err.message, '`precision` cannot be smaller than 0.00001.'); + const err = t.throws(() => getInstance().precision(0.000001), Error); + t.is(err.message, '`precision` must be greater than 0.00001.'); }); test('include is set', t => {