From 9f0ee3a3029210214fd769ae1689afa85eb7df68 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Wed, 30 May 2018 18:27:16 +0100 Subject: [PATCH 1/5] Fix issues in memory accounting and control plus some optimisations to data gatherer footprint --- include/api/CAnomalyJob.h | 6 +- include/api/CForecastRunner.h | 2 +- include/model/CAnomalyDetector.h | 12 +- include/model/CDataGatherer.h | 40 ++--- include/model/CResourceMonitor.h | 28 +++- lib/api/CAnomalyJob.cc | 5 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 6 +- lib/maths/CTimeSeriesDecompositionDetail.cc | 2 +- lib/model/CAnomalyDetector.cc | 11 +- lib/model/CAnomalyDetectorModel.cc | 2 +- lib/model/CCountingModelFactory.cc | 10 +- lib/model/CDataGatherer.cc | 137 ++++++++---------- lib/model/CEventRateModel.cc | 4 +- lib/model/CEventRateModelFactory.cc | 21 ++- lib/model/CEventRatePopulationModelFactory.cc | 15 +- lib/model/CIndividualModel.cc | 1 + lib/model/CMetricBucketGatherer.cc | 11 +- lib/model/CMetricModel.cc | 3 +- lib/model/CMetricModelFactory.cc | 20 ++- lib/model/CMetricPopulationModelFactory.cc | 22 +-- lib/model/CPopulationModel.cc | 9 +- lib/model/CResourceMonitor.cc | 103 +++++++------ lib/model/CSampleCounts.cc | 35 ++--- lib/model/unittest/CResourceMonitorTest.cc | 13 +- 24 files changed, 246 insertions(+), 272 deletions(-) diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index 6afab93123..ff53cdb223 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -108,10 +108,8 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { public: using TPersistCompleteFunc = std::function; - using TAnomalyDetectorPtr = model::CAnomalyDetector::TAnomalyDetectorPtr; + using TAnomalyDetectorPtr = std::shared_ptr; using TAnomalyDetectorPtrVec = std::vector; - using TAnomalyDetectorPtrVecItr = std::vector::iterator; - using TAnomalyDetectorPtrVecCItr = std::vector::const_iterator; using TKeyVec = std::vector; using TKeyAnomalyDetectorPtrUMap = boost::unordered_map; @@ -359,7 +357,7 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! Update configuration void doForecast(const std::string& controlMessage); - model::CAnomalyDetector::TAnomalyDetectorPtr + TAnomalyDetectorPtr makeDetector(int identifier, const model::CAnomalyDetectorModelConfig& modelConfig, model::CLimits& limits, diff --git a/include/api/CForecastRunner.h b/include/api/CForecastRunner.h index acefe25b54..73f1e87269 100644 --- a/include/api/CForecastRunner.h +++ b/include/api/CForecastRunner.h @@ -109,7 +109,7 @@ class API_EXPORT CForecastRunner final : private core::CNonCopyable { using TOStreamConcurrentWrapper = core::CConcurrentWrapper; using TOStreamConcurrentWrapperPtr = std::shared_ptr; - using TAnomalyDetectorPtr = model::CAnomalyDetector::TAnomalyDetectorPtr; + using TAnomalyDetectorPtr = std::shared_ptr; using TAnomalyDetectorPtrVec = std::vector; using TForecastModelWrapper = model::CForecastDataSink::SForecastModelWrapper; diff --git a/include/model/CAnomalyDetector.h b/include/model/CAnomalyDetector.h index e1b6639f50..9db37306a6 100644 --- a/include/model/CAnomalyDetector.h +++ b/include/model/CAnomalyDetector.h @@ -70,14 +70,9 @@ class MODEL_EXPORT CAnomalyDetector : private core::CNonCopyable { using TStrVec = std::vector; using TStrCPtrVec = std::vector; using TModelPlotDataVec = std::vector; - using TDataGathererPtr = std::shared_ptr; using TModelFactoryCPtr = std::shared_ptr; using TModelPtr = std::unique_ptr; - - //! A shared pointer to an instance of this class - using TAnomalyDetectorPtr = std::shared_ptr; - using TOutputModelPlotDataFunc = std::function; using TStrSet = CAnomalyDetectorModelConfig::TStrSet; @@ -334,14 +329,13 @@ class MODEL_EXPORT CAnomalyDetector : private core::CNonCopyable { //! in the model ensemble class. void legacyModelsAcceptPersistInserter(core::CStatePersistInserter& inserter) const; -protected: - //! Configurable limits - CLimits& m_Limits; - private: //! An identifier for the search for which this is detecting anomalies. int m_DetectorIndex; + //! Configurable limits + CLimits& m_Limits; + //! Configurable behaviour const CAnomalyDetectorModelConfig& m_ModelConfig; diff --git a/include/model/CDataGatherer.h b/include/model/CDataGatherer.h index 8a05bed9f4..77eb6641e6 100644 --- a/include/model/CDataGatherer.h +++ b/include/model/CDataGatherer.h @@ -108,36 +108,24 @@ class MODEL_EXPORT CDataGatherer { using TSizeUInt64Pr = std::pair; using TSizeUInt64PrVec = std::vector; using TFeatureVec = model_t::TFeatureVec; - using TFeatureVecCItr = TFeatureVec::const_iterator; using TSizeSizePr = std::pair; using TSizeSizePrUInt64Pr = std::pair; using TSizeSizePrUInt64PrVec = std::vector; using TSizeSizePrUInt64UMap = boost::unordered_map; - using TSizeSizePrUInt64UMapItr = TSizeSizePrUInt64UMap::iterator; - using TSizeSizePrUInt64UMapCItr = TSizeSizePrUInt64UMap::const_iterator; using TSizeSizePrUInt64UMapQueue = CBucketQueue; - using TSizeSizePrUInt64UMapQueueItr = TSizeSizePrUInt64UMapQueue::iterator; - using TSizeSizePrUInt64UMapQueueCItr = TSizeSizePrUInt64UMapQueue::const_iterator; - using TSizeSizePrUInt64UMapQueueCRItr = TSizeSizePrUInt64UMapQueue::const_reverse_iterator; using TSizeSizePrStoredStringPtrPrUInt64UMap = CBucketGatherer::TSizeSizePrStoredStringPtrPrUInt64UMap; - using TSizeSizePrStoredStringPtrPrUInt64UMapCItr = - TSizeSizePrStoredStringPtrPrUInt64UMap::const_iterator; - using TSizeSizePrStoredStringPtrPrUInt64UMapItr = - TSizeSizePrStoredStringPtrPrUInt64UMap::iterator; using TSizeSizePrStoredStringPtrPrUInt64UMapVec = std::vector; using TSizeSizePrStoredStringPtrPrUInt64UMapVecQueue = CBucketQueue; using TSearchKeyCRef = boost::reference_wrapper; - using TBucketGathererPVec = std::vector; - using TBucketGathererPVecItr = TBucketGathererPVec::iterator; - using TBucketGathererPVecCItr = TBucketGathererPVec::const_iterator; + using TBucketGathererPtr = std::unique_ptr; + using TBucketGathererPtrVec = std::vector; using TFeatureAnyPr = std::pair; using TFeatureAnyPrVec = std::vector; using TMetricCategoryVec = std::vector; - using TSampleCountsPtr = std::shared_ptr; + using TSampleCountsPtr = std::unique_ptr; using TTimeVec = std::vector; - using TTimeVecCItr = TTimeVec::const_iterator; public: //! The summary count indicating an explicit null record. @@ -161,8 +149,6 @@ class MODEL_EXPORT CDataGatherer { //! \param[in] modelParams The global configuration parameters. //! \param[in] summaryCountFieldName If \p summaryMode is E_Manual //! then this is the name of the field holding the summary count. - //! \param[in] partitionFieldName The name of the field which splits - //! the data. //! \param[in] partitionFieldValue The value of the field which splits //! the data. //! \param[in] personFieldName The name of the field which identifies @@ -173,8 +159,6 @@ class MODEL_EXPORT CDataGatherer { //! the metric values. //! \param[in] influenceFieldNames The field names for which we will //! compute influences. - //! \param[in] useNull If true the gatherer will process missing - //! person and attribute field values (assuming they are empty). //! \param[in] key The key of the search for which to gatherer data. //! \param[in] features The features of the data to model. //! \param[in] startTime The start of the time interval for which @@ -187,13 +171,11 @@ class MODEL_EXPORT CDataGatherer { model_t::ESummaryMode summaryMode, const SModelParams& modelParams, const std::string& summaryCountFieldName, - const std::string& partitionFieldName, const std::string& partitionFieldValue, const std::string& personFieldName, const std::string& attributeFieldName, const std::string& valueFieldName, const TStrVec& influenceFieldNames, - bool useNull, const CSearchKey& key, const TFeatureVec& features, core_t::TTime startTime, @@ -204,13 +186,11 @@ class MODEL_EXPORT CDataGatherer { model_t::ESummaryMode summaryMode, const SModelParams& modelParams, const std::string& summaryCountFieldName, - const std::string& partitionFieldName, const std::string& partitionFieldValue, const std::string& personFieldName, const std::string& attributeFieldName, const std::string& valueFieldName, const TStrVec& influenceFieldNames, - bool useNull, const CSearchKey& key, core::CStateRestoreTraverser& traverser); @@ -220,8 +200,9 @@ class MODEL_EXPORT CDataGatherer { //! redundant except to create a signature that will not be mistaken for //! a general purpose copy constructor. CDataGatherer(bool isForPersistence, const CDataGatherer& other); - ~CDataGatherer(); + CDataGatherer(const CDataGatherer&) = delete; + CDataGatherer& operator=(const CDataGatherer&) = delete; //@} //! \name Persistence @@ -546,7 +527,7 @@ class MODEL_EXPORT CDataGatherer { void resetSampleCount(std::size_t id); //! Get the sample counts. - TSampleCountsPtr sampleCounts() const; + const TSampleCountsPtr& sampleCounts() const; //@} //! \name Time @@ -759,7 +740,7 @@ class MODEL_EXPORT CDataGatherer { //! The collection of bucket gatherers which contain the bucket-specific //! metrics and counts. - TBucketGathererPVec m_Gatherers; + TBucketGathererPtrVec m_Gatherers; //! Indicates whether the data being gathered are already summarized //! by an external aggregation process. @@ -768,15 +749,12 @@ class MODEL_EXPORT CDataGatherer { //! The global configuration parameters. TModelParamsCRef m_Params; - //! The partition field name or an empty string if there isn't one. - std::string m_PartitionFieldName; + //! The key of the search for which data is being gathered. + TSearchKeyCRef m_SearchKey; //! The value of the partition field for this detector. core::CStoredStringPtr m_PartitionFieldValue; - //! The key of the search for which data is being gathered. - TSearchKeyCRef m_SearchKey; - //! A registry where person names are mapped to unique IDs. CDynamicStringIdRegistry m_PeopleRegistry; diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index b9749a0510..d4d064ab1c 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -11,8 +11,9 @@ #include #include +#include + #include -#include class CResourceMonitorTest; class CResourceLimitTest; @@ -42,8 +43,8 @@ class MODEL_EXPORT CResourceMonitor { }; public: - using TModelPtrSizePr = std::pair; - using TModelPtrSizeMap = std::map; + using TDetectorPtrSizePr = std::pair; + using TDetectorPtrSizeUMap = boost::unordered_map; using TMemoryUsageReporterFunc = std::function; using TTimeSizeMap = std::map; @@ -127,13 +128,21 @@ class MODEL_EXPORT CResourceMonitor { //! Clears all extra memory void clearExtraMemory(); + //! Decrease the margin on the memory limit. + //! + //! We start off applying a margin to the memory limit because + //! it is difficult to accurately estimate the long term memory + //! usage at this point. This is gradually decreased over time + //! by calling this pnce per bucket processed. + void decreaseMargin(); + private: //! Updates the memory limit fields and the prune threshold //! to the given value. void updateMemoryLimitsAndPruneThreshold(std::size_t limitMBs); //! Update the given model and recalculate the total usage - void memUsage(CAnomalyDetectorModel* model); + void memUsage(CAnomalyDetector* detector); //! Determine if we need to send a usage report, based on //! increased usage, or increased errors @@ -143,16 +152,25 @@ class MODEL_EXPORT CResourceMonitor { //! shoule be allowed or not void updateAllowAllocations(); + //! Get the high memory limit with margin applied. + std::size_t highLimit() const; + + //! Get the low memory limit with margin applied. + std::size_t lowLimit() const; + //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; private: //! The registered collection of components - TModelPtrSizeMap m_Models; + TDetectorPtrSizeUMap m_Detectors; //! Is there enough free memory to allow creating new components bool m_AllowAllocations; + //! The relative margin to apply to the byte limits. + double m_ByteLimitMargin; + //! The upper limit for memory usage, checked on increasing values std::size_t m_ByteLimitHigh; diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index b7aa388cff..d028ebc9b3 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -413,6 +413,7 @@ void CAnomalyJob::outputBucketResultsUntil(core_t::TTime time) { lastBucketEndTime + bucketLength + latency <= time; lastBucketEndTime += effectiveBucketLength) { this->outputResults(lastBucketEndTime); + m_Limits.resourceMonitor().decreaseMargin(); m_Limits.resourceMonitor().sendMemoryUsageReportIfSignificantlyChanged(lastBucketEndTime); m_LastFinalisedBucketEndTime = lastBucketEndTime + effectiveBucketLength; @@ -1403,7 +1404,7 @@ CAnomalyJob::detectorForKey(bool isRestoring, // Check if we need to and are allowed to create a new detector. if (itr == m_Detectors.end() && resourceMonitor.areAllocationsAllowed()) { // Create an placeholder for the anomaly detector. - model::CAnomalyDetector::TAnomalyDetectorPtr& detector = + TAnomalyDetectorPtr& detector = m_Detectors .emplace(model::CSearchKey::TStrKeyPr(partition, key), TAnomalyDetectorPtr()) .first->second; @@ -1450,7 +1451,7 @@ void CAnomalyJob::pruneAllModels() { } } -model::CAnomalyDetector::TAnomalyDetectorPtr +CAnomalyJob::TAnomalyDetectorPtr CAnomalyJob::makeDetector(int identifier, const model::CAnomalyDetectorModelConfig& modelConfig, model::CLimits& limits, diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 0119643589..9add2b3877 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -390,7 +390,7 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_ByFields > 820 && used.s_ByFields < 980); + CPPUNIT_ASSERT(used.s_ByFields > 700 && used.s_ByFields < 860); CPPUNIT_ASSERT_EQUAL(std::size_t(2), used.s_PartitionFields); } @@ -433,7 +433,7 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_PartitionFields > 390 && used.s_PartitionFields < 470); + CPPUNIT_ASSERT(used.s_PartitionFields > 430 && used.s_PartitionFields < 510); CPPUNIT_ASSERT(static_cast(used.s_ByFields) > 0.95 * static_cast(used.s_PartitionFields)); } @@ -475,6 +475,6 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_OverFields > 10000 && used.s_OverFields < 12000); + CPPUNIT_ASSERT(used.s_OverFields > 8500 && used.s_OverFields < 10500); } } diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index f2d9b93d8c..bc36f0b04c 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -652,7 +652,7 @@ std::size_t CTimeSeriesDecompositionDetail::CPeriodicityTest::extraMemoryOnIniti if (result == 0) { for (auto i : {E_Short, E_Long}) { TExpandingWindowPtr window(this->newWindow(i, false)); - result += core::CMemory::dynamicSize(window); + result += 0.3 * core::CMemory::dynamicSize(window); } } return result; diff --git a/lib/model/CAnomalyDetector.cc b/lib/model/CAnomalyDetector.cc index 35c2b46514..37f14afb58 100644 --- a/lib/model/CAnomalyDetector.cc +++ b/lib/model/CAnomalyDetector.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -99,7 +100,7 @@ CAnomalyDetector::CAnomalyDetector(int detectorIndex, const std::string& partitionFieldValue, core_t::TTime firstTime, const TModelFactoryCPtr& modelFactory) - : m_Limits(limits), m_DetectorIndex(detectorIndex), m_ModelConfig(modelConfig), + : m_DetectorIndex(detectorIndex), m_Limits(limits), m_ModelConfig(modelConfig), m_LastBucketEndTime(maths::CIntegerTools::ceil(firstTime, modelConfig.bucketLength())), m_DataGatherer(makeDataGatherer(modelFactory, m_LastBucketEndTime, partitionFieldValue)), m_ModelFactory(modelFactory), @@ -120,7 +121,7 @@ CAnomalyDetector::CAnomalyDetector(int detectorIndex, } CAnomalyDetector::CAnomalyDetector(bool isForPersistence, const CAnomalyDetector& other) - : m_Limits(other.m_Limits), m_DetectorIndex(other.m_DetectorIndex), + : m_DetectorIndex(other.m_DetectorIndex), m_Limits(other.m_Limits), m_ModelConfig(other.m_ModelConfig), // Empty result function is fine in this case // Empty result count function is fine in this case @@ -612,14 +613,12 @@ void CAnomalyDetector::showMemoryUsage(std::ostream& stream) const { void CAnomalyDetector::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const { mem->setName("Anomaly Detector Memory Usage"); + core::CMemoryDebug::dynamicSize("m_DataGatherer", m_DataGatherer, mem); core::CMemoryDebug::dynamicSize("m_Model", m_Model, mem); } std::size_t CAnomalyDetector::memoryUsage() const { - // We only account for the model in CResourceMonitor, - // so we just include that here. - std::size_t mem = core::CMemory::dynamicSize(m_Model); - return mem; + return core::CMemory::dynamicSize(m_DataGatherer) + core::CMemory::dynamicSize(m_Model); } const core_t::TTime& CAnomalyDetector::lastBucketEndTime() const { diff --git a/lib/model/CAnomalyDetectorModel.cc b/lib/model/CAnomalyDetectorModel.cc index 628adb6041..0f1e601dda 100644 --- a/lib/model/CAnomalyDetectorModel.cc +++ b/lib/model/CAnomalyDetectorModel.cc @@ -95,7 +95,7 @@ CAnomalyDetectorModel::CAnomalyDetectorModel(bool isForPersistence, : // The copy of m_DataGatherer is a shallow copy. This would be unacceptable // if we were going to persist the data gatherer from within this class. // We don't, so that's OK, but the next issue is that another thread will be - // modifying the data gatherer m_DataGatherer points to whilst this object + // modifying the data gatherer m_DataGatherer points too whilst this object // is being persisted. Therefore, persistence must only call methods on the // data gatherer that are invariant. m_Params(other.m_Params), m_DataGatherer(other.m_DataGatherer), diff --git a/lib/model/CCountingModelFactory.cc b/lib/model/CCountingModelFactory.cc index 3a48ccbeb9..86e3d0d242 100644 --- a/lib/model/CCountingModelFactory.cc +++ b/lib/model/CCountingModelFactory.cc @@ -62,9 +62,8 @@ CCountingModelFactory::makeModel(const SModelInitializationData& initData, CDataGatherer* CCountingModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), - m_SummaryCountFieldName, m_PartitionFieldName, - initData.s_PartitionFieldValue, m_PersonFieldName, - EMPTY_STRING, EMPTY_STRING, TStrVec(), m_UseNull, + m_SummaryCountFieldName, initData.s_PartitionFieldValue, + m_PersonFieldName, EMPTY_STRING, EMPTY_STRING, {}, this->searchKey(), m_Features, initData.s_StartTime, 0); } @@ -73,9 +72,8 @@ CCountingModelFactory::makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const { return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), m_SummaryCountFieldName, - m_PartitionFieldName, partitionFieldValue, - m_PersonFieldName, EMPTY_STRING, EMPTY_STRING, - TStrVec(), m_UseNull, this->searchKey(), traverser); + partitionFieldValue, m_PersonFieldName, EMPTY_STRING, + EMPTY_STRING, {}, this->searchKey(), traverser); } CCountingModelFactory::TPriorPtr diff --git a/lib/model/CDataGatherer.cc b/lib/model/CDataGatherer.cc index 38b756a339..38d4249dd5 100644 --- a/lib/model/CDataGatherer.cc +++ b/lib/model/CDataGatherer.cc @@ -21,9 +21,11 @@ #include #include #include +#include #include #include +#include #include @@ -160,37 +162,35 @@ bool isPopulation(model_t::EAnalysisCategory gathererType) { const std::string CDataGatherer::EXPLICIT_NULL("null"); const std::size_t CDataGatherer::EXPLICIT_NULL_SUMMARY_COUNT(std::numeric_limits::max()); -const std::size_t CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD(10000); +const std::size_t CDataGatherer::ESTIMATED_MEM_USAGE_PER_BY_FIELD(20000); const std::size_t CDataGatherer::ESTIMATED_MEM_USAGE_PER_OVER_FIELD(1000); CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, model_t::ESummaryMode summaryMode, const SModelParams& modelParams, const std::string& summaryCountFieldName, - const std::string& partitionFieldName, const std::string& partitionFieldValue, const std::string& personFieldName, const std::string& attributeFieldName, const std::string& valueFieldName, const TStrVec& influenceFieldNames, - bool useNull, const CSearchKey& key, const TFeatureVec& features, core_t::TTime startTime, int sampleCountOverride) : m_GathererType(gathererType), - m_Features(detail::sanitize(features, gathererType)), m_SummaryMode(summaryMode), - m_Params(modelParams), m_PartitionFieldName(partitionFieldName), + m_Features(detail::sanitize(features, gathererType)), + m_SummaryMode(summaryMode), m_Params(modelParams), m_SearchKey(key), m_PartitionFieldValue(CStringStore::names().get(partitionFieldValue)), - m_SearchKey(key), m_PeopleRegistry(PERSON, - stat_t::E_NumberNewPeople, - stat_t::E_NumberNewPeopleNotAllowed, - stat_t::E_NumberNewPeopleRecycled), + m_PeopleRegistry(PERSON, + stat_t::E_NumberNewPeople, + stat_t::E_NumberNewPeopleNotAllowed, + stat_t::E_NumberNewPeopleRecycled), m_AttributesRegistry(ATTRIBUTE, stat_t::E_NumberNewAttributes, stat_t::E_NumberNewAttributesNotAllowed, stat_t::E_NumberNewAttributesRecycled), - m_Population(detail::isPopulation(gathererType)), m_UseNull(useNull) { + m_Population(detail::isPopulation(gathererType)), m_UseNull(key.useNull()) { // Constructor needs to create 1 bucket gatherer at the startTime // and possibly 1 bucket gatherer at (startTime + bucketLength / 2). @@ -212,27 +212,25 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, model_t::ESummaryMode summaryMode, const SModelParams& modelParams, const std::string& summaryCountFieldName, - const std::string& partitionFieldName, const std::string& partitionFieldValue, const std::string& personFieldName, const std::string& attributeFieldName, const std::string& valueFieldName, const TStrVec& influenceFieldNames, - bool useNull, const CSearchKey& key, core::CStateRestoreTraverser& traverser) : m_GathererType(gathererType), m_SummaryMode(summaryMode), - m_Params(modelParams), m_PartitionFieldName(partitionFieldName), + m_Params(modelParams), m_SearchKey(key), m_PartitionFieldValue(CStringStore::names().get(partitionFieldValue)), - m_SearchKey(key), m_PeopleRegistry(PERSON, - stat_t::E_NumberNewPeople, - stat_t::E_NumberNewPeopleNotAllowed, - stat_t::E_NumberNewPeopleRecycled), + m_PeopleRegistry(PERSON, + stat_t::E_NumberNewPeople, + stat_t::E_NumberNewPeopleNotAllowed, + stat_t::E_NumberNewPeopleRecycled), m_AttributesRegistry(ATTRIBUTE, stat_t::E_NumberNewAttributes, stat_t::E_NumberNewAttributesNotAllowed, stat_t::E_NumberNewAttributesRecycled), - m_Population(detail::isPopulation(gathererType)), m_UseNull(useNull) { + m_Population(detail::isPopulation(gathererType)), m_UseNull(key.useNull()) { if (traverser.traverseSubLevel(boost::bind( &CDataGatherer::acceptRestoreTraverser, this, boost::cref(summaryCountFieldName), boost::cref(personFieldName), boost::cref(attributeFieldName), @@ -244,18 +242,16 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, CDataGatherer::CDataGatherer(bool isForPersistence, const CDataGatherer& other) : m_GathererType(other.m_GathererType), m_Features(other.m_Features), m_SummaryMode(other.m_SummaryMode), m_Params(other.m_Params), - m_PartitionFieldName(other.m_PartitionFieldName), - m_PartitionFieldValue(other.m_PartitionFieldValue), m_SearchKey(other.m_SearchKey), + m_PartitionFieldValue(other.m_PartitionFieldValue), m_PeopleRegistry(isForPersistence, other.m_PeopleRegistry), m_AttributesRegistry(isForPersistence, other.m_AttributesRegistry), m_Population(other.m_Population), m_UseNull(other.m_UseNull) { if (!isForPersistence) { LOG_ABORT(<< "This constructor only creates clones for persistence"); } - for (TBucketGathererPVecCItr i = other.m_Gatherers.begin(); - i != other.m_Gatherers.end(); ++i) { - m_Gatherers.push_back((*i)->cloneForPersistence()); + for (const auto& gatherer : other.m_Gatherers) { + m_Gatherers.emplace_back(gatherer->cloneForPersistence()); } if (other.m_SampleCounts) { m_SampleCounts.reset(other.m_SampleCounts->cloneForPersistence()); @@ -263,9 +259,6 @@ CDataGatherer::CDataGatherer(bool isForPersistence, const CDataGatherer& other) } CDataGatherer::~CDataGatherer() { - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - delete *i; - } } CDataGatherer* CDataGatherer::cloneForPersistence() const { @@ -293,7 +286,7 @@ std::size_t CDataGatherer::maxDimension() const { } const std::string& CDataGatherer::partitionFieldName() const { - return m_PartitionFieldName; + return boost::unwrap_ref(m_SearchKey).partitionFieldName(); } const std::string& CDataGatherer::partitionFieldValue() const { @@ -360,8 +353,8 @@ bool CDataGatherer::addArrival(const TStrCPtrVec& fieldValues, } bool result = true; - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - result &= (*i)->addEventData(data); + for (auto& gatherer : m_Gatherers) { + result &= gatherer->addEventData(data); } return result; } @@ -371,8 +364,8 @@ void CDataGatherer::sampleNow(core_t::TTime sampleBucketStart) { } void CDataGatherer::skipSampleNow(core_t::TTime sampleBucketStart) { - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - (*i)->skipSampleNow(sampleBucketStart); + for (auto& gatherer : m_Gatherers) { + gatherer->skipSampleNow(sampleBucketStart); } } @@ -429,8 +422,8 @@ void CDataGatherer::recyclePeople(const TSizeVec& peopleToRemove) { return; } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - (*i)->recyclePeople(peopleToRemove); + for (auto& gatherer : m_Gatherers) { + gatherer->recyclePeople(peopleToRemove); } if (!this->isPopulation() && m_SampleCounts) { @@ -452,8 +445,8 @@ void CDataGatherer::removePeople(std::size_t lowestPersonToRemove) { m_SampleCounts->remove(lowestPersonToRemove); } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - (*i)->removePeople(lowestPersonToRemove); + for (auto& gatherer : m_Gatherers) { + gatherer->removePeople(lowestPersonToRemove); } m_PeopleRegistry.removeNames(lowestPersonToRemove); @@ -508,8 +501,8 @@ void CDataGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { m_SampleCounts->recycle(attributesToRemove); } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - (*i)->recycleAttributes(attributesToRemove); + for (auto& gatherer : m_Gatherers) { + gatherer->recycleAttributes(attributesToRemove); } m_AttributesRegistry.recycleNames(attributesToRemove, DEFAULT_ATTRIBUTE_NAME); @@ -527,8 +520,8 @@ void CDataGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { m_SampleCounts->remove(lowestAttributeToRemove); } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - (*i)->removeAttributes(lowestAttributeToRemove); + for (auto& gatherer : m_Gatherers) { + gatherer->removeAttributes(lowestAttributeToRemove); } m_AttributesRegistry.removeNames(lowestAttributeToRemove); @@ -574,7 +567,7 @@ void CDataGatherer::resetSampleCount(std::size_t id) { } } -CDataGatherer::TSampleCountsPtr CDataGatherer::sampleCounts() const { +const CDataGatherer::TSampleCountsPtr& CDataGatherer::sampleCounts() const { return m_SampleCounts; } @@ -605,8 +598,8 @@ bool CDataGatherer::validateSampleTimes(core_t::TTime& startTime, core_t::TTime } void CDataGatherer::timeNow(core_t::TTime time) { - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - (*i)->timeNow(time); + for (auto& gatherer : m_Gatherers) { + gatherer->timeNow(time); } } @@ -631,8 +624,8 @@ uint64_t CDataGatherer::checksum() const { if (m_SampleCounts) { result = maths::CChecksum::calculate(result, m_SampleCounts->checksum(*this)); } - for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - result = maths::CChecksum::calculate(result, (*i)->checksum()); + for (const auto& gatherer : m_Gatherers) { + result = maths::CChecksum::calculate(result, gatherer); } LOG_TRACE(<< "checksum = " << result); @@ -645,10 +638,9 @@ void CDataGatherer::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) co core::CMemoryDebug::dynamicSize("m_Features", m_Features, mem); core::CMemoryDebug::dynamicSize("m_PeopleRegistry", m_PeopleRegistry, mem); core::CMemoryDebug::dynamicSize("m_AttributesRegistry", m_AttributesRegistry, mem); - core::CMemoryDebug::dynamicSize("m_PartitionFieldName", m_PartitionFieldName, mem); core::CMemoryDebug::dynamicSize("m_SampleCounts", m_SampleCounts, mem); - for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - core::CMemoryDebug::dynamicSize("BucketGatherer", *(*i), mem); + for (const auto& gatherer : m_Gatherers) { + core::CMemoryDebug::dynamicSize("BucketGatherer", *gatherer, mem); } } @@ -656,10 +648,9 @@ std::size_t CDataGatherer::memoryUsage() const { std::size_t mem = core::CMemory::dynamicSize(m_Features); mem += core::CMemory::dynamicSize(m_PeopleRegistry); mem += core::CMemory::dynamicSize(m_AttributesRegistry); - mem += core::CMemory::dynamicSize(m_PartitionFieldName); mem += core::CMemory::dynamicSize(m_SampleCounts); - for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - mem += core::CMemory::dynamicSize(*(*i)); + for (const auto& gatherer : m_Gatherers) { + mem += core::CMemory::dynamicSize(*gatherer); } return mem; } @@ -674,15 +665,15 @@ void CDataGatherer::clear() { if (m_SampleCounts) { m_SampleCounts->clear(); } - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - (*i)->clear(); + for (auto& gatherer : m_Gatherers) { + gatherer->clear(); } } bool CDataGatherer::resetBucket(core_t::TTime bucketStart) { bool result = true; - for (TBucketGathererPVecItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - result &= (*i)->resetBucket(bucketStart); + for (auto& gatherer : m_Gatherers) { + result &= gatherer->resetBucket(bucketStart); } return result; } @@ -863,7 +854,7 @@ bool CDataGatherer::acceptRestoreTraverser(const std::string& summaryCountFieldN traverser.traverseSubLevel(boost::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, &m_AttributesRegistry, _1))) RESTORE_SETUP_TEARDOWN( - SAMPLE_COUNTS_TAG, m_SampleCounts.reset(new CSampleCounts(0)), + SAMPLE_COUNTS_TAG, m_SampleCounts = boost::make_unique(0), traverser.traverseSubLevel(boost::bind(&CSampleCounts::acceptRestoreTraverser, m_SampleCounts.get(), _1)), /**/) @@ -886,24 +877,23 @@ bool CDataGatherer::restoreBucketGatherer(const std::string& summaryCountFieldNa do { const std::string& name = traverser.name(); if (name == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) { - CEventRateBucketGatherer* gatherer = new CEventRateBucketGatherer( + TBucketGathererPtr gatherer{boost::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, - valueFieldName, influenceFieldNames, traverser); - + valueFieldName, influenceFieldNames, traverser)}; if (gatherer == nullptr) { LOG_ERROR(<< "Failed to create gatherer"); return false; } - m_Gatherers.push_back(gatherer); + m_Gatherers.push_back(std::move(gatherer)); } else if (name == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) { - CMetricBucketGatherer* gatherer = new CMetricBucketGatherer( + TBucketGathererPtr gatherer{boost::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, - valueFieldName, influenceFieldNames, traverser); + valueFieldName, influenceFieldNames, traverser)}; if (gatherer == nullptr) { LOG_ERROR(<< "Failed to create gatherer"); return false; } - m_Gatherers.push_back(gatherer); + m_Gatherers.push_back(std::move(gatherer)); } } while (traverser.next()); @@ -911,19 +901,18 @@ bool CDataGatherer::restoreBucketGatherer(const std::string& summaryCountFieldNa } void CDataGatherer::persistBucketGatherers(core::CStatePersistInserter& inserter) const { - for (TBucketGathererPVecCItr i = m_Gatherers.begin(); i != m_Gatherers.end(); ++i) { - const std::string& tag = (*i)->persistenceTag(); - + for (const auto& gatherer : m_Gatherers) { + const std::string& tag = gatherer->persistenceTag(); if (tag == CBucketGatherer::EVENTRATE_BUCKET_GATHERER_TAG) { - CEventRateBucketGatherer* const gatherer = - dynamic_cast(*i); + const CEventRateBucketGatherer* gatherer_ = + dynamic_cast(gatherer.get()); inserter.insertLevel(tag, boost::bind(&CEventRateBucketGatherer::acceptPersistInserter, - boost::cref(gatherer), _1)); + boost::cref(gatherer_), _1)); } else if (tag == CBucketGatherer::METRIC_BUCKET_GATHERER_TAG) { - CMetricBucketGatherer* const gatherer = - dynamic_cast(*i); + const CMetricBucketGatherer* gatherer_ = + dynamic_cast(gatherer.get()); inserter.insertLevel(tag, boost::bind(&CMetricBucketGatherer::acceptPersistInserter, - boost::cref(gatherer), _1)); + boost::cref(gatherer_), _1)); } } } @@ -940,15 +929,15 @@ void CDataGatherer::createBucketGatherer(model_t::EAnalysisCategory gathererType case model_t::E_EventRate: case model_t::E_PopulationEventRate: case model_t::E_PeersEventRate: - m_Gatherers.push_back(new CEventRateBucketGatherer( + m_Gatherers.push_back(boost::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, startTime)); break; case model_t::E_Metric: case model_t::E_PopulationMetric: case model_t::E_PeersMetric: - m_SampleCounts.reset(new CSampleCounts(sampleCountOverride)); - m_Gatherers.push_back(new CMetricBucketGatherer( + m_SampleCounts = boost::make_unique(sampleCountOverride); + m_Gatherers.push_back(boost::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, startTime)); break; diff --git a/lib/model/CEventRateModel.cc b/lib/model/CEventRateModel.cc index ad679dce7e..2226f398f3 100644 --- a/lib/model/CEventRateModel.cc +++ b/lib/model/CEventRateModel.cc @@ -478,9 +478,7 @@ void CEventRateModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) } std::size_t CEventRateModel::memoryUsage() const { - return this->CIndividualModel::memoryUsage() + - core::CMemory::dynamicSize(m_InterimBucketCorrector); - ; + return this->CIndividualModel::memoryUsage(); } std::size_t CEventRateModel::staticSize() const { diff --git a/lib/model/CEventRateModelFactory.cc b/lib/model/CEventRateModelFactory.cc index cccf21f5ab..e2ae833aa3 100644 --- a/lib/model/CEventRateModelFactory.cc +++ b/lib/model/CEventRateModelFactory.cc @@ -91,23 +91,22 @@ CEventRateModelFactory::makeModel(const SModelInitializationData& initData, CDataGatherer* CEventRateModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer( - model_t::E_EventRate, m_SummaryMode, this->modelParams(), m_SummaryCountFieldName, - m_PartitionFieldName, initData.s_PartitionFieldValue, m_PersonFieldName, - EMPTY_STRING, // AttributeFieldName - m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, this->searchKey(), - m_Features, initData.s_StartTime, initData.s_SampleOverrideCount); + return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, + this->modelParams(), m_SummaryCountFieldName, + initData.s_PartitionFieldValue, m_PersonFieldName, + EMPTY_STRING /*AttributeFieldName*/, m_ValueFieldName, + m_InfluenceFieldNames, this->searchKey(), m_Features, + initData.s_StartTime, initData.s_SampleOverrideCount); } CDataGatherer* CEventRateModelFactory::makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const { - return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), - m_SummaryCountFieldName, m_PartitionFieldName, + return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, + this->modelParams(), m_SummaryCountFieldName, partitionFieldValue, m_PersonFieldName, - EMPTY_STRING, // AttributeFieldName - m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, - this->searchKey(), traverser); + EMPTY_STRING /*AttributeFieldName*/, m_ValueFieldName, + m_InfluenceFieldNames, this->searchKey(), traverser); } CEventRateModelFactory::TPriorPtr diff --git a/lib/model/CEventRatePopulationModelFactory.cc b/lib/model/CEventRatePopulationModelFactory.cc index 4d719ea462..63ef6a5c33 100644 --- a/lib/model/CEventRatePopulationModelFactory.cc +++ b/lib/model/CEventRatePopulationModelFactory.cc @@ -93,20 +93,19 @@ CDataGatherer* CEventRatePopulationModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { return new CDataGatherer(model_t::E_PopulationEventRate, m_SummaryMode, this->modelParams(), m_SummaryCountFieldName, - m_PartitionFieldName, initData.s_PartitionFieldValue, - m_PersonFieldName, m_AttributeFieldName, - m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, + initData.s_PartitionFieldValue, m_PersonFieldName, + m_AttributeFieldName, m_ValueFieldName, m_InfluenceFieldNames, this->searchKey(), m_Features, initData.s_StartTime, 0); } CDataGatherer* CEventRatePopulationModelFactory::makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const { - return new CDataGatherer( - model_t::E_PopulationEventRate, m_SummaryMode, this->modelParams(), - m_SummaryCountFieldName, m_PartitionFieldName, partitionFieldValue, - m_PersonFieldName, m_AttributeFieldName, m_ValueFieldName, - m_InfluenceFieldNames, m_UseNull, this->searchKey(), traverser); + return new CDataGatherer(model_t::E_PopulationEventRate, m_SummaryMode, + this->modelParams(), m_SummaryCountFieldName, + partitionFieldValue, m_PersonFieldName, + m_AttributeFieldName, m_ValueFieldName, + m_InfluenceFieldNames, this->searchKey(), traverser); } CEventRatePopulationModelFactory::TPriorPtr diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 55945b6ff8..0c902b9479 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -428,6 +428,7 @@ void CIndividualModel::createUpdateNewModels(core_t::TTime time, numberExistingPeople, 0, numberCorrelations); } } + this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, 0, numberCorrelations); if (numberNewPeople > 0) { resourceMonitor.acceptAllocationFailureResult(time); diff --git a/lib/model/CMetricBucketGatherer.cc b/lib/model/CMetricBucketGatherer.cc index 4c2a49f040..5041310a43 100644 --- a/lib/model/CMetricBucketGatherer.cc +++ b/lib/model/CMetricBucketGatherer.cc @@ -645,7 +645,7 @@ struct SDoSample { TSizeSizeTUMapUMap& data, core_t::TTime time, const CMetricBucketGatherer& gatherer, - CDataGatherer::TSampleCountsPtr sampleCounts) const { + CSampleCounts& sampleCounts) const { for (const auto& count : gatherer.bucketCounts(time)) { std::size_t pid = CDataGatherer::extractPersonId(count); std::size_t cid = CDataGatherer::extractAttributeId(count); @@ -661,8 +661,8 @@ struct SDoSample { LOG_ERROR(<< "No gatherer for attribute " << gatherer.dataGatherer().attributeName(cid) << " of person " << gatherer.dataGatherer().personName(pid)); - } else if (pidEntry->second.sample(time, sampleCounts->count(activeId))) { - sampleCounts->updateSampleVariance(activeId); + } else if (pidEntry->second.sample(time, sampleCounts.count(activeId))) { + sampleCounts.updateSampleVariance(activeId); } } } @@ -1353,8 +1353,9 @@ void CMetricBucketGatherer::releaseMemory(core_t::TTime samplingCutoffTime) { void CMetricBucketGatherer::sample(core_t::TTime time) { if (m_DataGatherer.sampleCounts()) { - apply(m_FeatureData, boost::bind(SDoSample(), _1, _2, time, boost::cref(*this), - m_DataGatherer.sampleCounts())); + apply(m_FeatureData, + boost::bind(SDoSample(), _1, _2, time, boost::cref(*this), + boost::ref(*m_DataGatherer.sampleCounts()))); } // Merge smallest bucket into longer buckets, if they exist this->CBucketGatherer::sample(time); diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index f0212dc1bb..70a6d02582 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -442,8 +442,7 @@ void CMetricModel::debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) con } std::size_t CMetricModel::memoryUsage() const { - return this->CIndividualModel::memoryUsage() + - core::CMemory::dynamicSize(m_InterimBucketCorrector); + return this->CIndividualModel::memoryUsage(); } std::size_t CMetricModel::computeMemoryUsage() const { diff --git a/lib/model/CMetricModelFactory.cc b/lib/model/CMetricModelFactory.cc index e608babad5..82cee578d8 100644 --- a/lib/model/CMetricModelFactory.cc +++ b/lib/model/CMetricModelFactory.cc @@ -89,23 +89,21 @@ CMetricModelFactory::makeModel(const SModelInitializationData& initData, CDataGatherer* CMetricModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer( - model_t::E_Metric, m_SummaryMode, this->modelParams(), m_SummaryCountFieldName, - m_PartitionFieldName, initData.s_PartitionFieldValue, m_PersonFieldName, - EMPTY_STRING, // AttributeFieldName - m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, this->searchKey(), - m_Features, initData.s_StartTime, initData.s_SampleOverrideCount); + return new CDataGatherer(model_t::E_Metric, m_SummaryMode, + this->modelParams(), m_SummaryCountFieldName, + initData.s_PartitionFieldValue, m_PersonFieldName, + EMPTY_STRING /*AttributeFieldName*/, m_ValueFieldName, + m_InfluenceFieldNames, this->searchKey(), m_Features, + initData.s_StartTime, initData.s_SampleOverrideCount); } CDataGatherer* CMetricModelFactory::makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const { return new CDataGatherer(model_t::E_Metric, m_SummaryMode, this->modelParams(), - m_SummaryCountFieldName, m_PartitionFieldName, - partitionFieldValue, m_PersonFieldName, - EMPTY_STRING, // AttributeFieldName - m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, - this->searchKey(), traverser); + m_SummaryCountFieldName, partitionFieldValue, m_PersonFieldName, + EMPTY_STRING /*AttributeFieldName*/, m_ValueFieldName, + m_InfluenceFieldNames, this->searchKey(), traverser); } CMetricModelFactory::TPriorPtr diff --git a/lib/model/CMetricPopulationModelFactory.cc b/lib/model/CMetricPopulationModelFactory.cc index 28e49a5058..4d60aa9f72 100644 --- a/lib/model/CMetricPopulationModelFactory.cc +++ b/lib/model/CMetricPopulationModelFactory.cc @@ -89,22 +89,22 @@ CMetricPopulationModelFactory::makeModel(const SModelInitializationData& initDat CDataGatherer* CMetricPopulationModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer( - model_t::E_PopulationMetric, m_SummaryMode, this->modelParams(), - m_SummaryCountFieldName, m_PartitionFieldName, - initData.s_PartitionFieldValue, m_PersonFieldName, m_AttributeFieldName, - m_ValueFieldName, m_InfluenceFieldNames, m_UseNull, this->searchKey(), - m_Features, initData.s_StartTime, initData.s_SampleOverrideCount); + return new CDataGatherer(model_t::E_PopulationMetric, m_SummaryMode, + this->modelParams(), m_SummaryCountFieldName, + initData.s_PartitionFieldValue, m_PersonFieldName, + m_AttributeFieldName, m_ValueFieldName, + m_InfluenceFieldNames, this->searchKey(), m_Features, + initData.s_StartTime, initData.s_SampleOverrideCount); } CDataGatherer* CMetricPopulationModelFactory::makeDataGatherer(const std::string& partitionFieldValue, core::CStateRestoreTraverser& traverser) const { - return new CDataGatherer( - model_t::E_PopulationMetric, m_SummaryMode, this->modelParams(), - m_SummaryCountFieldName, m_PartitionFieldName, partitionFieldValue, - m_PersonFieldName, m_AttributeFieldName, m_ValueFieldName, - m_InfluenceFieldNames, m_UseNull, this->searchKey(), traverser); + return new CDataGatherer(model_t::E_PopulationMetric, m_SummaryMode, + this->modelParams(), m_SummaryCountFieldName, + partitionFieldValue, m_PersonFieldName, + m_AttributeFieldName, m_ValueFieldName, + m_InfluenceFieldNames, this->searchKey(), traverser); } CMetricPopulationModelFactory::TPriorPtr diff --git a/lib/model/CPopulationModel.cc b/lib/model/CPopulationModel.cc index ce72e3c195..737bb5d23b 100644 --- a/lib/model/CPopulationModel.cc +++ b/lib/model/CPopulationModel.cc @@ -375,8 +375,13 @@ void CPopulationModel::createUpdateNewModels(core_t::TTime time, } } - while (numberNewAttributes > 0 && resourceMonitor.areAllocationsAllowed() && - (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { + this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, + numberExistingAttributes, 0); + + while (numberNewAttributes > 0 && + (numberExistingAttributes == 0 || + (resourceMonitor.areAllocationsAllowed() && + (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)))) { // We batch attributes in CHUNK_SIZE (500) and create models in chunks // and test usage after each chunk. std::size_t numberToCreate = std::min(numberNewAttributes, CHUNK_SIZE); diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 3f2dc8c308..f2507a53a8 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -26,8 +26,8 @@ const core_t::TTime CResourceMonitor::MINIMUM_PRUNE_FREQUENCY(60 * 60); const std::size_t CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB(4096); CResourceMonitor::CResourceMonitor() - : m_AllowAllocations(true), m_ByteLimitHigh(0), m_ByteLimitLow(0), - m_CurrentAnomalyDetectorMemory(0), m_ExtraMemory(0), + : m_AllowAllocations(true), m_ByteLimitMargin{0.7}, m_ByteLimitHigh(0), + m_ByteLimitLow(0), m_CurrentAnomalyDetectorMemory(0), m_ExtraMemory(0), m_PreviousTotal(this->totalMemory()), m_Peak(m_PreviousTotal), m_LastAllocationFailureReport(0), m_MemoryStatus(model_t::E_MemoryStatusOk), m_HasPruningStarted(false), m_PruneThreshold(0), m_LastPruneTime(0), @@ -42,20 +42,19 @@ void CResourceMonitor::memoryUsageReporter(const TMemoryUsageReporterFunc& repor } void CResourceMonitor::registerComponent(CAnomalyDetector& detector) { - LOG_TRACE(<< "Registering component: " << detector.model().get()); - m_Models.insert({detector.model().get(), std::size_t(0)}); + LOG_TRACE(<< "Registering component: " << &detector); + m_Detectors.emplace(&detector, std::size_t(0)); } void CResourceMonitor::unRegisterComponent(CAnomalyDetector& detector) { - auto iter = m_Models.find(detector.model().get()); - if (iter == m_Models.end()) { - LOG_ERROR(<< "Inconsistency - component has not been registered: " - << detector.model().get()); + auto itr = m_Detectors.find(&detector); + if (itr == m_Detectors.end()) { + LOG_ERROR(<< "Inconsistency - component has not been registered: " << &detector); return; } - LOG_TRACE(<< "Unregistering component: " << detector.model().get()); - m_Models.erase(iter); + LOG_TRACE(<< "Unregistering component: " << &detector); + m_Detectors.erase(itr); } void CResourceMonitor::memoryLimit(std::size_t limitMBs) { @@ -90,8 +89,8 @@ void CResourceMonitor::updateMemoryLimitsAndPruneThreshold(std::size_t limitMBs) // persist is configured. m_ByteLimitHigh = static_cast((limitMBs * 1024 * 1024) / 2); } - m_ByteLimitLow = m_ByteLimitHigh - 1024; - m_PruneThreshold = static_cast(m_ByteLimitHigh / 5 * 3); + m_ByteLimitLow = (m_ByteLimitHigh * 49) / 50; + m_PruneThreshold = (m_ByteLimitHigh * 3) / 5; } model_t::EMemoryStatus CResourceMonitor::getMemoryStatus() { @@ -106,7 +105,7 @@ void CResourceMonitor::refresh(CAnomalyDetector& detector) { } void CResourceMonitor::forceRefresh(CAnomalyDetector& detector) { - this->memUsage(detector.model().get()); + this->memUsage(&detector); core::CStatistics::stat(stat_t::E_MemoryUsage).set(this->totalMemory()); LOG_TRACE(<< "Checking allocations: currently at " << this->totalMemory()); this->updateAllowAllocations(); @@ -115,16 +114,14 @@ void CResourceMonitor::forceRefresh(CAnomalyDetector& detector) { void CResourceMonitor::updateAllowAllocations() { std::size_t total{this->totalMemory()}; if (m_AllowAllocations) { - if (total > m_ByteLimitHigh) { - LOG_INFO(<< "Over allocation limit. " << total - << " bytes used, the limit is " << m_ByteLimitHigh); + if (total > this->highLimit()) { + LOG_INFO(<< "Over current allocation limit. " << total + << " bytes used, the limit is " << this->highLimit()); m_AllowAllocations = false; } - } else { - if (total < m_ByteLimitLow) { - LOG_INFO(<< "Below allocation limit, used " << total); - m_AllowAllocations = true; - } + } else if (total < this->lowLimit()) { + LOG_INFO(<< "Below allocation limit, used " << total); + m_AllowAllocations = true; } } @@ -146,19 +143,19 @@ bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) { return false; } - if (m_Models.empty()) { + if (m_Detectors.empty()) { return false; } if (m_HasPruningStarted == false) { // The longest we'll consider keeping priors for is 1M buckets. - CAnomalyDetectorModel* model = m_Models.begin()->first; - if (model == nullptr) { + CAnomalyDetector* detector = m_Detectors.begin()->first; + if (detector == nullptr) { return false; } - m_PruneWindowMaximum = model->defaultPruneWindow(); + m_PruneWindowMaximum = detector->model()->defaultPruneWindow(); m_PruneWindow = m_PruneWindowMaximum; - m_PruneWindowMinimum = model->minimumPruneWindow(); + m_PruneWindowMinimum = detector->model()->minimumPruneWindow(); m_HasPruningStarted = true; this->acceptPruningResult(); LOG_DEBUG(<< "Pruning started. Window (buckets): " << m_PruneWindow); @@ -168,10 +165,11 @@ bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) { // Do a prune and see how much we got back // These are the expensive operations std::size_t usageAfter = 0; - for (auto& model : m_Models) { - model.first->prune(m_PruneWindow); - model.second = model.first->memoryUsage(); - usageAfter += model.second; + for (auto& detector : m_Detectors) { + const auto& model = detector.first->model(); + model->prune(m_PruneWindow); + detector.second = model->memoryUsage(); + usageAfter += detector.second; } m_CurrentAnomalyDetectorMemory = usageAfter; total = this->totalMemory(); @@ -183,10 +181,10 @@ bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) { if (total < m_PruneThreshold) { // Expand the window - m_PruneWindow = std::min( - m_PruneWindow + std::size_t((endTime - m_LastPruneTime) / - m_Models.begin()->first->bucketLength()), - m_PruneWindowMaximum); + const auto& model = m_Detectors.begin()->first->model(); + m_PruneWindow = std::min(m_PruneWindow + std::size_t((endTime - m_LastPruneTime) / + model->bucketLength()), + m_PruneWindowMaximum); LOG_TRACE(<< "Expanding window, to " << m_PruneWindow); } else { // Shrink the window @@ -205,24 +203,24 @@ bool CResourceMonitor::areAllocationsAllowed() const { bool CResourceMonitor::areAllocationsAllowed(std::size_t size) const { if (m_AllowAllocations) { - return this->totalMemory() + size < m_ByteLimitHigh; + return this->totalMemory() + size < this->highLimit(); } return false; } std::size_t CResourceMonitor::allocationLimit() const { - return m_ByteLimitHigh - std::min(m_ByteLimitHigh, this->totalMemory()); + return this->highLimit() - std::min(this->highLimit(), this->totalMemory()); } -void CResourceMonitor::memUsage(CAnomalyDetectorModel* model) { - auto iter = m_Models.find(model); - if (iter == m_Models.end()) { - LOG_ERROR(<< "Inconsistency - component has not been registered: " << model); +void CResourceMonitor::memUsage(CAnomalyDetector* detector) { + auto itr = m_Detectors.find(detector); + if (itr == m_Detectors.end()) { + LOG_ERROR(<< "Inconsistency - component has not been registered: " << detector); return; } - std::size_t modelPreviousUsage = iter->second; - std::size_t modelCurrentUsage = core::CMemory::dynamicSize(iter->first); - iter->second = modelCurrentUsage; + std::size_t modelPreviousUsage = itr->second; + std::size_t modelCurrentUsage = core::CMemory::dynamicSize(itr->first); + itr->second = modelCurrentUsage; m_CurrentAnomalyDetectorMemory += (modelCurrentUsage - modelPreviousUsage); } @@ -270,10 +268,11 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi res.s_AllocationFailures = 0; res.s_MemoryStatus = m_MemoryStatus; res.s_BucketStartTime = bucketStartTime; - for (const auto& model : m_Models) { + for (const auto& detector : m_Detectors) { ++res.s_PartitionFields; - res.s_OverFields += model.first->dataGatherer().numberOverFieldValues(); - res.s_ByFields += model.first->dataGatherer().numberByFieldValues(); + const auto& dataGatherer = detector.first->model()->dataGatherer(); + res.s_OverFields += dataGatherer.numberOverFieldValues(); + res.s_ByFields += dataGatherer.numberByFieldValues(); } res.s_AllocationFailures += m_AllocationFailures.size(); return res; @@ -306,6 +305,18 @@ void CResourceMonitor::clearExtraMemory() { } } +void CResourceMonitor::decreaseMargin() { + m_ByteLimitMargin = 1.0 - 0.99 * (1.0 - m_ByteLimitMargin); +} + +std::size_t CResourceMonitor::highLimit() const { + return static_cast(m_ByteLimitMargin * static_cast(m_ByteLimitHigh)); +} + +std::size_t CResourceMonitor::lowLimit() const { + return static_cast(m_ByteLimitMargin * static_cast(m_ByteLimitLow)); +} + std::size_t CResourceMonitor::totalMemory() const { return m_CurrentAnomalyDetectorMemory + m_ExtraMemory + CStringStore::names().memoryUsage() + diff --git a/lib/model/CSampleCounts.cc b/lib/model/CSampleCounts.cc index aab3463477..08bbc0fabf 100644 --- a/lib/model/CSampleCounts.cc +++ b/lib/model/CSampleCounts.cc @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -67,24 +68,11 @@ void CSampleCounts::acceptPersistInserter(core::CStatePersistInserter& inserter) bool CSampleCounts::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { do { const std::string& name = traverser.name(); - if (name == SAMPLE_COUNT_TAG) { - if (core::CPersistUtils::restore(name, m_SampleCounts, traverser) == false) { - LOG_ERROR(<< "Invalid sample counts"); - return false; - } - } else if (name == MEAN_NON_ZERO_BUCKET_COUNT_TAG) { - if (core::CPersistUtils::restore(name, m_MeanNonZeroBucketCounts, - traverser) == false) { - LOG_ERROR(<< "Invalid non-zero bucket count means"); - return false; - } - } else if (name == EFFECTIVE_SAMPLE_VARIANCE_TAG) { - if (core::CPersistUtils::restore(name, m_EffectiveSampleVariances, - traverser) == false) { - LOG_ERROR(<< "Invalid effective sample variances"); - return false; - } - } + RESTORE(SAMPLE_COUNT_TAG, core::CPersistUtils::restore(name, m_SampleCounts, traverser)) + RESTORE(MEAN_NON_ZERO_BUCKET_COUNT_TAG, + core::CPersistUtils::restore(name, m_MeanNonZeroBucketCounts, traverser)) + RESTORE(EFFECTIVE_SAMPLE_VARIANCE_TAG, + core::CPersistUtils::restore(name, m_EffectiveSampleVariances, traverser)) } while (traverser.next()); return true; } @@ -119,10 +107,9 @@ void CSampleCounts::resetSampleCount(const CDataGatherer& gatherer, std::size_t if (maths::CBasicStatistics::count(count_) >= NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) { unsigned sampleCountThreshold = 0; const CDataGatherer::TFeatureVec& features = gatherer.features(); - for (CDataGatherer::TFeatureVecCItr i = features.begin(); - i != features.end(); ++i) { - sampleCountThreshold = - std::max(sampleCountThreshold, model_t::minimumSampleCount(*i)); + for (const auto& feature : features) { + sampleCountThreshold = std::max(sampleCountThreshold, + model_t::minimumSampleCount(feature)); } double count = maths::CBasicStatistics::mean(count_); m_SampleCounts[id] = std::max(sampleCountThreshold, @@ -139,9 +126,9 @@ void CSampleCounts::refresh(const CDataGatherer& gatherer) { unsigned sampleCountThreshold = 0; const CDataGatherer::TFeatureVec& features = gatherer.features(); - for (CDataGatherer::TFeatureVecCItr i = features.begin(); i != features.end(); ++i) { + for (const auto& feature : features) { sampleCountThreshold = - std::max(sampleCountThreshold, model_t::minimumSampleCount(*i)); + std::max(sampleCountThreshold, model_t::minimumSampleCount(feature)); } for (std::size_t id = 0u; id < m_MeanNonZeroBucketCounts.size(); ++id) { diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index e4d65f4f51..2c617e87db 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -58,7 +58,8 @@ void CResourceMonitorTest::testMonitor() { limits, modelConfig, EMPTY_STRING, FIRST_TIME, modelConfig.factory(key)); - std::size_t mem = detector1.memoryUsage() + detector2.memoryUsage() + + std::size_t mem = core::CMemory::dynamicSize(&detector1) + + core::CMemory::dynamicSize(&detector2) + CStringStore::names().memoryUsage() + CStringStore::influencers().memoryUsage(); @@ -98,15 +99,15 @@ void CResourceMonitorTest::testMonitor() { // Test adding and removing a CAnomalyDetector CResourceMonitor mon; - CPPUNIT_ASSERT_EQUAL(std::size_t(0), mon.m_Models.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), mon.m_Detectors.size()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), mon.m_CurrentAnomalyDetectorMemory); CPPUNIT_ASSERT(mon.m_PreviousTotal > 0); // because it includes string store memory mon.registerComponent(detector1); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), mon.m_Models.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), mon.m_Detectors.size()); mon.registerComponent(detector2); - CPPUNIT_ASSERT_EQUAL(std::size_t(2), mon.m_Models.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(2), mon.m_Detectors.size()); mon.refresh(detector1); mon.refresh(detector2); @@ -115,10 +116,10 @@ void CResourceMonitorTest::testMonitor() { CPPUNIT_ASSERT_EQUAL(mem, mon.m_PreviousTotal); mon.unRegisterComponent(detector2); - CPPUNIT_ASSERT_EQUAL(std::size_t(1), mon.m_Models.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(1), mon.m_Detectors.size()); mon.unRegisterComponent(detector1); - CPPUNIT_ASSERT_EQUAL(std::size_t(0), mon.m_Models.size()); + CPPUNIT_ASSERT_EQUAL(std::size_t(0), mon.m_Detectors.size()); } { // Check that High limit can be breached and then gone back From 55e6dbe112f87d25956c5aeaad3494cf152b6dd1 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Thu, 31 May 2018 17:58:47 +0100 Subject: [PATCH 2/5] Fix unit tests --- include/model/CLimits.h | 5 +- include/model/CResourceMonitor.h | 5 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 6 +- lib/model/CLimits.cc | 8 +- lib/model/CResourceMonitor.cc | 10 +- lib/model/unittest/CDetectionRuleTest.cc | 101 ++-- .../unittest/CEventRateDataGathererTest.cc | 165 +++---- .../CEventRatePopulationDataGathererTest.cc | 72 ++- .../unittest/CHierarchicalResultsTest.cc | 79 ++-- lib/model/unittest/CMetricDataGathererTest.cc | 444 +++++------------- .../CMetricPopulationDataGathererTest.cc | 29 +- lib/model/unittest/CModelDetailsViewTest.cc | 28 +- lib/model/unittest/CResourceLimitTest.cc | 34 +- lib/model/unittest/CResourceMonitorTest.cc | 10 +- lib/model/unittest/CRuleConditionTest.cc | 7 +- 15 files changed, 370 insertions(+), 633 deletions(-) diff --git a/include/model/CLimits.h b/include/model/CLimits.h index 69ea8aab51..70b12ded58 100644 --- a/include/model/CLimits.h +++ b/include/model/CLimits.h @@ -64,10 +64,7 @@ class MODEL_EXPORT CLimits { public: //! Default constructor - CLimits(); - - //! Default destructor - ~CLimits(); + explicit CLimits(double byteLimitMargin = CResourceMonitor::DEFAULT_BYTE_LIMIT_MARGIN); //! Initialise from a config file. This overwrites current settings //! with any found in the config file. Settings that are not present diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index d4d064ab1c..f8dd8fd5cb 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -50,13 +50,14 @@ class MODEL_EXPORT CResourceMonitor { //! The minimum time between prunes static const core_t::TTime MINIMUM_PRUNE_FREQUENCY; - //! Default memory limit for resource monitor static const std::size_t DEFAULT_MEMORY_LIMIT_MB; + //! The initial byte limit margin to use if none is supplied + static const double DEFAULT_BYTE_LIMIT_MARGIN; public: //! Default constructor - CResourceMonitor(); + explicit CResourceMonitor(double byteLimitMargin = DEFAULT_BYTE_LIMIT_MARGIN); //! Query the resource monitor to find out if the models are //! taking up too much memory and further allocations should be banned diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 9add2b3877..ddd4ade9a3 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -390,7 +390,7 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_ByFields > 700 && used.s_ByFields < 860); + CPPUNIT_ASSERT(used.s_ByFields > 700 && used.s_ByFields < 900); CPPUNIT_ASSERT_EQUAL(std::size_t(2), used.s_PartitionFields); } @@ -433,7 +433,7 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_PartitionFields > 430 && used.s_PartitionFields < 510); + CPPUNIT_ASSERT(used.s_PartitionFields > 400 && used.s_PartitionFields < 500); CPPUNIT_ASSERT(static_cast(used.s_ByFields) > 0.95 * static_cast(used.s_PartitionFields)); } @@ -475,6 +475,6 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_OverFields > 8500 && used.s_OverFields < 10500); + CPPUNIT_ASSERT(used.s_OverFields > 8000 && used.s_OverFields < 9000); } } diff --git a/lib/model/CLimits.cc b/lib/model/CLimits.cc index 4e6f4f516a..8cb76c08a0 100644 --- a/lib/model/CLimits.cc +++ b/lib/model/CLimits.cc @@ -21,15 +21,13 @@ const size_t CLimits::DEFAULT_RESULTS_MAX_EXAMPLES(4); // The probability threshold is stored as a percentage in the config file const double CLimits::DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD(3.5); -CLimits::CLimits() +CLimits::CLimits(double byteLimitMargin) : m_AutoConfigEvents(DEFAULT_AUTOCONFIG_EVENTS), m_AnomalyMaxTimeBuckets(DEFAULT_ANOMALY_MAX_TIME_BUCKETS), m_MaxExamples(DEFAULT_RESULTS_MAX_EXAMPLES), m_UnusualProbabilityThreshold(DEFAULT_RESULTS_UNUSUAL_PROBABILITY_THRESHOLD), - m_MemoryLimitMB(CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB), m_ResourceMonitor() { -} - -CLimits::~CLimits() { + m_MemoryLimitMB(CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB), + m_ResourceMonitor(byteLimitMargin) { } bool CLimits::init(const std::string& configFile) { diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index f2507a53a8..4e02933c31 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -22,13 +22,13 @@ namespace model { // Only prune once per hour const core_t::TTime CResourceMonitor::MINIMUM_PRUNE_FREQUENCY(60 * 60); - const std::size_t CResourceMonitor::DEFAULT_MEMORY_LIMIT_MB(4096); +const double CResourceMonitor::DEFAULT_BYTE_LIMIT_MARGIN(0.7); -CResourceMonitor::CResourceMonitor() - : m_AllowAllocations(true), m_ByteLimitMargin{0.7}, m_ByteLimitHigh(0), - m_ByteLimitLow(0), m_CurrentAnomalyDetectorMemory(0), m_ExtraMemory(0), - m_PreviousTotal(this->totalMemory()), m_Peak(m_PreviousTotal), +CResourceMonitor::CResourceMonitor(double byteLimitMargin) + : m_AllowAllocations(true), m_ByteLimitMargin{byteLimitMargin}, + m_ByteLimitHigh(0), m_ByteLimitLow(0), m_CurrentAnomalyDetectorMemory(0), + m_ExtraMemory(0), m_PreviousTotal(this->totalMemory()), m_Peak(m_PreviousTotal), m_LastAllocationFailureReport(0), m_MemoryStatus(model_t::E_MemoryStatusOk), m_HasPruningStarted(false), m_PruneThreshold(0), m_LastPruneTime(0), m_PruneWindow(std::numeric_limits::max()), diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index 4acaec6d72..d166bde1b3 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -80,7 +80,6 @@ CppUnit::Test* CDetectionRuleTest::suite() { void CDetectionRuleTest::testApplyGivenCategoricalCondition() { core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; - CSearchKey key; SModelParams params(bucketLength); CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; @@ -90,10 +89,12 @@ void CDetectionRuleTest::testApplyGivenCategoricalCondition() { std::string partitionFieldValue("par_1"); std::string personFieldName("over"); std::string attributeFieldName("by"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + CSearchKey key(0, function_t::E_PopulationMetricMean, false, model_t::E_XF_None, + "", attributeFieldName, personFieldName, partitionFieldName); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, - partitionFieldName, partitionFieldValue, personFieldName, attributeFieldName, - EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); + partitionFieldValue, personFieldName, attributeFieldName, EMPTY_STRING, + TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool added = false; @@ -337,10 +338,9 @@ void CDetectionRuleTest::testApplyGivenNumericalActualCondition() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -442,10 +442,9 @@ void CDetectionRuleTest::testApplyGivenNumericalTypicalCondition() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -515,10 +514,9 @@ void CDetectionRuleTest::testApplyGivenNumericalDiffAbsCondition() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -612,10 +610,9 @@ void CDetectionRuleTest::testApplyGivenSingleSeriesModelAndConditionWithField() TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -659,10 +656,9 @@ void CDetectionRuleTest::testApplyGivenNoActualValueAvailable() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -699,10 +695,9 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndIndividualModel() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -744,10 +739,10 @@ void CDetectionRuleTest::testApplyGivenDifferentSeriesAndPopulationModel() { features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); std::string personFieldName("over"); std::string attributeFieldName("by"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, personFieldName, attributeFieldName, - EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); + EMPTY_STRING, personFieldName, attributeFieldName, EMPTY_STRING, + TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool added = false; @@ -806,10 +801,9 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithOr() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -923,10 +917,9 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, personFieldName, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -1037,7 +1030,6 @@ void CDetectionRuleTest::testApplyGivenMultipleConditionsWithAnd() { void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel() { core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; - CSearchKey key; SModelParams params(bucketLength); CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; @@ -1046,10 +1038,11 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( std::string partitionFieldName("partition"); std::string partitionFieldValue("partition_1"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, - partitionFieldName, partitionFieldValue, personFieldName, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); + CSearchKey key(0, function_t::E_IndividualMetricMean, false, model_t::E_XF_None, + "", personFieldName, EMPTY_STRING, partitionFieldName); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, partitionFieldValue, personFieldName, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); std::string person1("p1"); bool addedPerson = false; @@ -1128,7 +1121,6 @@ void CDetectionRuleTest::testApplyGivenTargetFieldIsPartitionAndIndividualModel( void CDetectionRuleTest::testApplyGivenTimeCondition() { core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; - CSearchKey key; SModelParams params(bucketLength); CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; @@ -1136,10 +1128,11 @@ void CDetectionRuleTest::testApplyGivenTimeCondition() { features.push_back(model_t::E_IndividualMeanByPerson); std::string partitionFieldName("partition"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, - partitionFieldName, EMPTY_STRING, personFieldName, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); + CSearchKey key(0, function_t::E_IndividualMetricMean, false, model_t::E_XF_None, + "", personFieldName, EMPTY_STRING, partitionFieldName); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); CMockModel model(params, gathererPtr, influenceCalculators); CRuleCondition conditionGte; @@ -1171,7 +1164,6 @@ void CDetectionRuleTest::testApplyGivenTimeCondition() { void CDetectionRuleTest::testRuleActions() { core_t::TTime bucketLength = 100; core_t::TTime startTime = 100; - CSearchKey key; SModelParams params(bucketLength); CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; @@ -1179,10 +1171,11 @@ void CDetectionRuleTest::testRuleActions() { features.push_back(model_t::E_IndividualMeanByPerson); std::string partitionFieldName("partition"); std::string personFieldName("series"); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, - partitionFieldName, EMPTY_STRING, personFieldName, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, key, features, startTime, 0)); + CSearchKey key(0, function_t::E_IndividualMetricMean, false, model_t::E_XF_None, + "", personFieldName, EMPTY_STRING, partitionFieldName); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); CMockModel model(params, gathererPtr, influenceCalculators); CRuleCondition conditionGte; diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index 82682e2276..4fcdcea99a 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -149,29 +149,31 @@ void addArrival(CDataGatherer& gatherer, gatherer.addArrival(fieldValues, eventData, resourceMonitor); } -void testPersistence(const SModelParams& params, const CDataGatherer& gatherer) { +void testPersistence(const SModelParams& params, const CDataGatherer& origGatherer) { // Test persistence. (We check for idempotency.) std::string origXml; { core::CRapidXmlStatePersistInserter inserter("root"); - gatherer.acceptPersistInserter(inserter); + origGatherer.acceptPersistInserter(inserter); inserter.toXml(origXml); } - LOG_TRACE(<< "model XML representation:\n" << origXml); + LOG_DEBUG(<< "gatherer XML size " << origXml.size()); + LOG_TRACE(<< "gatherer XML representation:\n" << origXml); - // Restore the XML into a new filter + // Restore the XML into a new filter. core::CRapidXmlParser parser; CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); CDataGatherer restoredGatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, traverser); + EMPTY_STRING, EMPTY_STRING, {}, key, traverser); + + CPPUNIT_ASSERT_EQUAL(origGatherer.checksum(), restoredGatherer.checksum()); // The XML representation of the new filter should be the - // same as the original + // same as the original. std::string newXml; { core::CRapidXmlStatePersistInserter inserter("root"); @@ -179,7 +181,6 @@ void testPersistence(const SModelParams& params, const CDataGatherer& gatherer) inserter.toXml(newXml); } CPPUNIT_ASSERT_EQUAL(origXml, newXml); - CPPUNIT_ASSERT_EQUAL(gatherer.checksum(), restoredGatherer.checksum()); } void testInfluencerPerFeature(model_t::EFeature feature, @@ -198,10 +199,9 @@ void testInfluencerPerFeature(model_t::EFeature feature, features.push_back(feature); TStrVec influencerFieldNames; influencerFieldNames.push_back("IF1"); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, valueField, influencerFieldNames, - false, key, features, startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, valueField, + influencerFieldNames, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, resourceMonitor, "p", valueField, 1)); @@ -303,9 +303,8 @@ void CEventRateDataGathererTest::testLatencyPersist() { TFeatureVec features; features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "program", EMPTY_STRING, "file", TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, + "file", {}, key, features, startTime, 0); TSizeVec fields; fields.push_back(2); fields.push_back(1); @@ -322,9 +321,8 @@ void CEventRateDataGathererTest::testLatencyPersist() { influencers.push_back("user"); features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "program", EMPTY_STRING, "file", influencers, - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, + "file", influencers, key, features, startTime, 0); TSizeVec fields; fields.push_back(2); fields.push_back(3); @@ -340,9 +338,8 @@ void CEventRateDataGathererTest::testLatencyPersist() { TFeatureVec features; features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "program", EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); TSizeVec fields; fields.push_back(2); @@ -357,10 +354,9 @@ void CEventRateDataGathererTest::testLatencyPersist() { TStrVec influencers; influencers.push_back("user"); features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "program", EMPTY_STRING, EMPTY_STRING, - influencers, false, key, features, startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, "program", EMPTY_STRING, EMPTY_STRING, + influencers, key, features, startTime, 0); TSizeVec fields; fields.push_back(2); fields.push_back(3); @@ -413,9 +409,8 @@ void CEventRateDataGathererTest::singleSeriesTests() { features.push_back(model_t::E_IndividualCountByBucketAndPerson); features.push_back(model_t::E_IndividualMinByPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); @@ -478,9 +473,8 @@ void CEventRateDataGathererTest::singleSeriesTests() { features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; @@ -519,9 +513,8 @@ void CEventRateDataGathererTest::singleSeriesTests() { TFeatureVec features; features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; @@ -592,9 +585,8 @@ void CEventRateDataGathererTest::multipleSeriesTests() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); @@ -661,9 +653,8 @@ void CEventRateDataGathererTest::multipleSeriesTests() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -732,9 +723,8 @@ void CEventRateDataGathererTest::testRemovePeople() { features.push_back(model_t::E_IndividualHighCountsByBucketAndPerson); SModelParams params(bucketLength); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -758,10 +748,10 @@ void CEventRateDataGathererTest::testRemovePeople() { peopleToRemove.push_back(1); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(expectedGatherer, m_ResourceMonitor, "p3")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), @@ -794,10 +784,10 @@ void CEventRateDataGathererTest::testRemovePeople() { peopleToRemove.push_back(7); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, + CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(expectedGatherer, m_ResourceMonitor, "p3")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), @@ -824,10 +814,10 @@ void CEventRateDataGathererTest::testRemovePeople() { peopleToRemove.push_back(6); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + {}, key, features, startTime, 0); LOG_DEBUG(<< "checksum = " << gatherer.checksum()); LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum()); @@ -887,9 +877,8 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); core_t::TTime time = startTime; @@ -925,9 +914,8 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); features.push_back(model_t::E_IndividualTotalBucketCountByPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; @@ -966,9 +954,8 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderFinalResultTests() { TFeatureVec features; features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; @@ -1021,9 +1008,8 @@ void CEventRateDataGathererTest::singleSeriesOutOfOrderInterimResultTests() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1169,9 +1155,8 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); @@ -1237,9 +1222,8 @@ void CEventRateDataGathererTest::multipleSeriesOutOfOrderFinalResultTests() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(gatherer, m_ResourceMonitor, "p1")); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson(gatherer, m_ResourceMonitor, "p2")); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -1307,9 +1291,8 @@ void CEventRateDataGathererTest::testArrivalBeforeLatencyWindowIsIgnored() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); addArrival(gatherer, m_ResourceMonitor, data[0], "p"); @@ -1351,9 +1334,8 @@ void CEventRateDataGathererTest::testResetBucketGivenSingleSeries() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); for (std::size_t i = 0; i < boost::size(data); ++i) { @@ -1407,9 +1389,8 @@ void CEventRateDataGathererTest::testResetBucketGivenMultipleSeries() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p1"); addPerson(gatherer, m_ResourceMonitor, "p2"); addPerson(gatherer, m_ResourceMonitor, "p3"); @@ -1459,9 +1440,8 @@ void CEventRateDataGathererTest::testResetBucketGivenBucketNotAvailable() { TFeatureVec features; features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); addPerson(gatherer, m_ResourceMonitor, "p"); addArrival(gatherer, m_ResourceMonitor, 1200, "p"); @@ -1815,9 +1795,8 @@ void CEventRateDataGathererTest::testDistinctStrings() { TFeatureVec features; features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, "P", EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, "V", TStrVec(1, "INF"), false, key, - features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, "P", EMPTY_STRING, + "V", {"INF"}, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), @@ -1869,9 +1848,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { TFeatureVec features; features.push_back(model_t::E_IndividualTimeOfDayByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "person", EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, "person", EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); @@ -1992,9 +1970,8 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { TFeatureVec features; features.push_back(model_t::E_IndividualTimeOfWeekByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "person", EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, "person", EMPTY_STRING, + EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT(!gatherer.isPopulation()); @@ -2116,8 +2093,7 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { features.push_back(model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute); CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, "att", EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + "att", EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT(gatherer.isPopulation()); @@ -2240,8 +2216,7 @@ void CEventRateDataGathererTest::testDiurnalFeatures() { features.push_back(model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute); CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, "att", EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0); + "att", EMPTY_STRING, {}, key, features, startTime, 0); CPPUNIT_ASSERT(gatherer.isPopulation()); diff --git a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc index 98035f4c4a..14615cdc62 100644 --- a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc +++ b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc @@ -227,10 +227,9 @@ void CEventRatePopulationDataGathererTest::testAttributeCounts() { features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); features.push_back(model_t::E_PopulationUniquePersonCountByAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, searchKey, features, startTime, 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, searchKey, features, startTime, 0); CPPUNIT_ASSERT(dataGatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(startTime, dataGatherer.currentBucketStartTime()); @@ -342,10 +341,9 @@ void CEventRatePopulationDataGathererTest::testAttributeIndicator() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationIndicatorOfBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, searchKey, features, startTime, 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, searchKey, features, startTime, 0); core_t::TTime time = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { @@ -402,10 +400,9 @@ void CEventRatePopulationDataGathererTest::testUniqueValueCounts() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationUniqueCountByBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, "value", TStrVec(), - false, searchKey, features, startTime, 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "value", {}, searchKey, features, startTime, 0); core_t::TTime time = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { @@ -471,10 +468,9 @@ void CEventRatePopulationDataGathererTest::testCompressedLength() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationInfoContentByBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, "value", TStrVec(), - false, searchKey, features, startTime, 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "value", {}, searchKey, features, startTime, 0); core_t::TTime time = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { @@ -560,10 +556,9 @@ void CEventRatePopulationDataGathererTest::testRemovePeople() { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, searchKey, features, startTime, 0); + CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, searchKey, features, startTime, 0); core_t::TTime bucketStart = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, bucketStart += bucketLength) { TMessageVec messages; @@ -686,10 +681,9 @@ void CEventRatePopulationDataGathererTest::testRemoveAttributes() { features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); features.push_back(model_t::E_PopulationUniquePersonCountByAttribute); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, searchKey, features, startTime, 0); + CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(rng, startTime, bucketLength, messages); @@ -792,10 +786,10 @@ void CEventRatePopulationDataGathererTest::testPersistence() { features.push_back(model_t::E_PopulationCountByBucketPersonAndAttribute); features.push_back(model_t::E_PopulationUniquePersonCountByAttribute); SModelParams params(bucketLength); - CDataGatherer origDataGatherer( - model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, searchKey, features, startTime, 0); + CDataGatherer origDataGatherer(model_t::E_PopulationEventRate, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + {}, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(rng, startTime, bucketLength, messages); @@ -821,10 +815,10 @@ void CEventRatePopulationDataGathererTest::testPersistence() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredDataGatherer( - model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, TStrVec(), false, searchKey, traverser); + CDataGatherer restoredDataGatherer(model_t::E_PopulationEventRate, + model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, searchKey, traverser); // The XML representation of the new data gatherer should be the same as the // original @@ -845,9 +839,9 @@ void CEventRatePopulationDataGathererTest::testPersistence() { features.push_back(model_t::E_PopulationInfoContentByBucketPersonAndAttribute); SModelParams params(bucketLength); CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, "value", TStrVec(), - false, searchKey, features, startTime, 0); + params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, "value", {}, + searchKey, features, startTime, 0); core_t::TTime time = startTime; for (std::size_t i = 0u; i < numberBuckets; ++i, time += bucketLength) { @@ -887,10 +881,10 @@ void CEventRatePopulationDataGathererTest::testPersistence() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredDataGatherer( - model_t::E_PopulationEventRate, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, TStrVec(), false, searchKey, traverser); + CDataGatherer restoredDataGatherer(model_t::E_PopulationEventRate, + model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, searchKey, traverser); // The XML representation of the new data gatherer should be the same as the // original diff --git a/lib/model/unittest/CHierarchicalResultsTest.cc b/lib/model/unittest/CHierarchicalResultsTest.cc index 28b5a84ad7..0f5a100c6d 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.cc +++ b/lib/model/unittest/CHierarchicalResultsTest.cc @@ -1017,10 +1017,10 @@ void CHierarchicalResultsTest::testAggregator() { // Test by. { - double p_[] = {0.22, 0.03, 0.02}; + TDoubleVec probabilities{0.22, 0.03, 0.02}; TAnnotatedProbabilityVec annotatedProbabilities; - for (std::size_t i = 0; i < boost::size(p_); ++i) { - annotatedProbabilities.push_back(model::SAnnotatedProbability(p_[i])); + for (auto p : probabilities) { + annotatedProbabilities.push_back(model::SAnnotatedProbability(p)); } model::CHierarchicalResults results; @@ -1035,7 +1035,6 @@ void CHierarchicalResultsTest::testAggregator() { CPrinter printer; results.postorderDepthFirst(printer); LOG_DEBUG(<< "\nby:\n" << printer.result()); - TDoubleVec probabilities(boost::begin(p_), boost::end(p_)); attributeComputer(probabilities, score, probability); CPPUNIT_ASSERT(results.root()); CPPUNIT_ASSERT_DOUBLES_EQUAL(score, results.root()->s_RawAnomalyScore, 1e-12); @@ -1044,10 +1043,10 @@ void CHierarchicalResultsTest::testAggregator() { // Test over. { - double p_[] = {0.25, 0.3, 0.001}; + TDoubleVec probabilities{0.25, 0.3, 0.001}; TAnnotatedProbabilityVec annotatedProbabilities; - for (std::size_t i = 0; i < boost::size(p_); ++i) { - annotatedProbabilities.push_back(model::SAnnotatedProbability(p_[i])); + for (auto p : probabilities) { + annotatedProbabilities.push_back(model::SAnnotatedProbability(p)); } model::CHierarchicalResults results; @@ -1062,7 +1061,6 @@ void CHierarchicalResultsTest::testAggregator() { CPrinter printer; results.postorderDepthFirst(printer); LOG_DEBUG(<< "\nover:\n" << printer.result()); - TDoubleVec probabilities(boost::begin(p_), boost::end(p_)); personComputer(probabilities, score, probability); CPPUNIT_ASSERT(results.root()); CPPUNIT_ASSERT_DOUBLES_EQUAL(score, results.root()->s_RawAnomalyScore, 1e-12); @@ -1145,10 +1143,10 @@ void CHierarchicalResultsTest::testAggregator() { // Test partition { - double p_[] = {0.01, 0.03, 0.001}; + TDoubleVec probabilities{0.01, 0.03, 0.001}; TAnnotatedProbabilityVec annotatedProbabilities; - for (std::size_t i = 0; i < boost::size(p_); ++i) { - annotatedProbabilities.push_back(model::SAnnotatedProbability(p_[i])); + for (auto p : probabilities) { + annotatedProbabilities.push_back(model::SAnnotatedProbability(p)); } model::CHierarchicalResults results; results.addModelResult(1, false, FUNC, function, PNF1, pn11, EMPTY_STRING, @@ -1162,7 +1160,6 @@ void CHierarchicalResultsTest::testAggregator() { CPrinter printer; results.postorderDepthFirst(printer); LOG_DEBUG(<< "\npartition:\n" << printer.result()); - TDoubleVec probabilities(boost::begin(p_), boost::end(p_)); partitionComputer(probabilities, score, probability); CPPUNIT_ASSERT(results.root()); CPPUNIT_ASSERT_DOUBLES_EQUAL(score, results.root()->s_RawAnomalyScore, 1e-12); @@ -1478,11 +1475,12 @@ void CHierarchicalResultsTest::testWriter() { auto interimBucketCorrector = std::make_shared(modelConfig.bucketLength()); model::CSearchKey key; - model::CAnomalyDetectorModel::TDataGathererPtr dataGatherer(new model::CDataGatherer( - model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), false, - key, model_t::TFeatureVec(1, model_t::E_IndividualCountByBucketAndPerson), - modelConfig.bucketLength(), 0)); + model::CAnomalyDetectorModel::TDataGathererPtr dataGatherer( + std::make_shared( + model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, + model_t::TFeatureVec{model_t::E_IndividualCountByBucketAndPerson}, + modelConfig.bucketLength(), 0)); model::CEventData dummy; dataGatherer->addArrival(TStrCPtrVec(1, &EMPTY_STRING), dummy, resourceMonitor); dummy.clear(); @@ -1539,7 +1537,6 @@ void CHierarchicalResultsTest::testWriter() { void CHierarchicalResultsTest::testNormalizer() { using TNormalizerPtr = std::shared_ptr; using TStrNormalizerPtrMap = std::map; - using TStrNormalizerPtrMapItr = TStrNormalizerPtrMap::iterator; using TNodeCPtrSet = std::set; model::CAnomalyDetectorModelConfig modelConfig = @@ -1566,8 +1563,8 @@ void CHierarchicalResultsTest::testNormalizer() { {"4", FALSE_STR, PNF2, pn22, PF1, p12, EMPTY_STRING}, {"4", FALSE_STR, PNF2, pn23, PF1, p13, EMPTY_STRING}}; TStrNormalizerPtrMap expectedNormalizers; - expectedNormalizers.insert(TStrNormalizerPtrMap::value_type( - std::string("r"), TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))); + expectedNormalizers.emplace( + "r", std::make_shared(modelConfig)); test::CRandomNumbers rng; for (std::size_t i = 0u; i < 300; ++i) { @@ -1600,15 +1597,15 @@ void CHierarchicalResultsTest::testNormalizer() { for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) { std::string key = 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; - TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); + auto itr = expectedNormalizers.find(key); if (itr == expectedNormalizers.end()) { itr = expectedNormalizers - .insert(TStrNormalizerPtrMap::value_type( - key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .emplace(key, std::make_shared(modelConfig)) .first; } double probability = extract.leafNodes()[j]->probability(); - // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() + // This truncation condition needs to be kept the same as the one in + // CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); @@ -1617,10 +1614,11 @@ void CHierarchicalResultsTest::testNormalizer() { for (std::size_t j = 0u; j < extract.leafNodes().size(); ++j) { std::string key = 'l' + *extract.leafNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.leafNodes()[j]->s_Spec.s_PersonFieldName; - TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); + auto itr = expectedNormalizers.find(key); if (nodes.insert(extract.leafNodes()[j]).second) { double probability = extract.leafNodes()[j]->probability(); - // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() + // This truncation condition needs to be kept the same as the one in + // CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); @@ -1641,15 +1639,15 @@ void CHierarchicalResultsTest::testNormalizer() { for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) { std::string key = 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; - TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); + auto itr = expectedNormalizers.find(key); if (itr == expectedNormalizers.end()) { itr = expectedNormalizers - .insert(TStrNormalizerPtrMap::value_type( - key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .emplace(key, std::make_shared(modelConfig)) .first; } double probability = extract.personNodes()[j]->probability(); - // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() + // This truncation condition needs to be kept the same as the one in + // CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); @@ -1658,10 +1656,11 @@ void CHierarchicalResultsTest::testNormalizer() { for (std::size_t j = 0u; j < extract.personNodes().size(); ++j) { std::string key = 'p' + *extract.personNodes()[j]->s_Spec.s_PartitionFieldName + ' ' + *extract.personNodes()[j]->s_Spec.s_PersonFieldName; - TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); + auto itr = expectedNormalizers.find(key); if (nodes.insert(extract.personNodes()[j]).second) { double probability = extract.personNodes()[j]->probability(); - // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() + // This truncation condition needs to be kept the same as the one in + // CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); @@ -1681,15 +1680,15 @@ void CHierarchicalResultsTest::testNormalizer() { expectedNormalized.clear(); for (std::size_t j = 0u; j < extract.partitionNodes().size(); ++j) { std::string key = 'n' + *extract.partitionNodes()[j]->s_Spec.s_PartitionFieldName; - TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); + auto itr = expectedNormalizers.find(key); if (itr == expectedNormalizers.end()) { itr = expectedNormalizers - .insert(TStrNormalizerPtrMap::value_type( - key, TNormalizerPtr(new model::CAnomalyScore::CNormalizer(modelConfig)))) + .emplace(key, std::make_shared(modelConfig)) .first; } double probability = extract.partitionNodes()[j]->probability(); - // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() + // This truncation condition needs to be kept the same as the one in + // CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); @@ -1697,10 +1696,11 @@ void CHierarchicalResultsTest::testNormalizer() { } for (std::size_t j = 0u; j < extract.partitionNodes().size(); ++j) { std::string key = 'n' + *extract.partitionNodes()[j]->s_Spec.s_PartitionFieldName; - TStrNormalizerPtrMapItr itr = expectedNormalizers.find(key); + auto itr = expectedNormalizers.find(key); if (nodes.insert(extract.partitionNodes()[j]).second) { double probability = extract.partitionNodes()[j]->probability(); - // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() + // This truncation condition needs to be kept the same as the one in + // CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); @@ -1717,7 +1717,8 @@ void CHierarchicalResultsTest::testNormalizer() { core::CContainerPrinter::print(normalized)); double probability = results.root()->probability(); - // This truncation condition needs to be kept the same as the one in CHierarchicalResultsNormalizer::visit() + // This truncation condition needs to be kept the same as the one in + // CHierarchicalResultsNormalizer::visit() double score = probability > modelConfig.maximumAnomalousProbability() ? 0.0 : maths::CTools::anomalyScore(probability); diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index 94e87ea28b..0276438531 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -160,6 +160,40 @@ double variance(const TDoubleVec& values, double& mean) { const CSearchKey KEY; const std::string EMPTY_STRING; + +void testPersistence(const SModelParams& params, const CDataGatherer& origGatherer) { + // Test persistence. (We check for idempotency.) + std::string origXml; + { + core::CRapidXmlStatePersistInserter inserter("root"); + origGatherer.acceptPersistInserter(inserter); + inserter.toXml(origXml); + } + + LOG_DEBUG(<< "gatherer XML size " << origXml.size()); + LOG_TRACE(<< "gatherer XML representation:\n" << origXml); + + // Restore the XML into a new filter + core::CRapidXmlParser parser; + CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); + core::CRapidXmlStateRestoreTraverser traverser(parser); + + CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, {}, KEY, traverser); + + CPPUNIT_ASSERT_EQUAL(origGatherer.checksum(), restoredGatherer.checksum()); + + // The XML representation of the new filter should be the + // same as the original + std::string newXml; + { + core::CRapidXmlStatePersistInserter inserter("root"); + restoredGatherer.acceptPersistInserter(inserter); + inserter.toXml(newXml); + } + CPPUNIT_ASSERT_EQUAL(origXml, newXml); +} } void CMetricDataGathererTest::singleSeriesTests() { @@ -175,9 +209,7 @@ void CMetricDataGathererTest::singleSeriesTests() { TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; TTimeDoublePr bucket3[] = {TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5)}; - TTimeDoublePr bucket4[] = { - TTimeDoublePr(1900, 3.5), - }; + TTimeDoublePr bucket4[] = {TTimeDoublePr(1900, 3.5)}; TTimeDoublePr bucket5[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), TTimeDoublePr(2490, 3.8)}; { @@ -189,9 +221,8 @@ void CMetricDataGathererTest::singleSeriesTests() { features.push_back(model_t::E_IndividualCountByBucketAndPerson); SModelParams params(bucketLength); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 2u); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); @@ -229,7 +260,7 @@ void CMetricDataGathererTest::singleSeriesTests() { CPPUNIT_ASSERT_EQUAL(true, featureData[3].second[0].second.s_IsInteger); } - for (size_t i = 1; i < boost::size(bucket1); ++i) { + for (std::size_t i = 1; i < boost::size(bucket1); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", bucket1[i].second); } @@ -264,42 +295,12 @@ void CMetricDataGathererTest::singleSeriesTests() { CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), core::CContainerPrinter::print( featureData[3].second[0].second.s_Samples)); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - gatherer.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - LOG_DEBUG(<< "gatherer XML representation:\n" << origXml); - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); - - // The XML representation of the new filter should be the - // same as the original - std::string newXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - restoredGatherer.acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - CPPUNIT_ASSERT_EQUAL(origXml, newXml); + testPersistence(params, gatherer); } gatherer.timeNow(startTime + bucketLength); - for (size_t i = 0; i < boost::size(bucket2); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket2[i].first, "p", - bucket2[i].second); + for (const auto& value : bucket2) { + addArrival(gatherer, m_ResourceMonitor, value.first, "p", value.second); } { TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -327,36 +328,7 @@ void CMetricDataGathererTest::singleSeriesTests() { CPPUNIT_ASSERT_EQUAL(std::string("[(600 [6] 1 3)]"), core::CContainerPrinter::print( featureData[3].second[0].second.s_Samples)); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - gatherer.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - LOG_DEBUG(<< "model XML representation:\n" << origXml); - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); - - // The XML representation of the new filter should be the - // same as the original - std::string newXml; - { - ml::core::CRapidXmlStatePersistInserter inserter("root"); - restoredGatherer.acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - CPPUNIT_ASSERT_EQUAL(origXml, newXml); + testPersistence(params, gatherer); } } @@ -369,9 +341,8 @@ void CMetricDataGathererTest::singleSeriesTests() { features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TTimeDoublePrVecVec buckets; @@ -438,9 +409,9 @@ void CMetricDataGathererTest::multipleSeriesTests() { features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 0); + EMPTY_STRING, {}, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p2", gatherer, m_ResourceMonitor)); @@ -451,9 +422,7 @@ void CMetricDataGathererTest::multipleSeriesTests() { TTimeDoublePr bucket12[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; TTimeDoublePr bucket13[] = {TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5)}; - TTimeDoublePr bucket14[] = { - TTimeDoublePr(1900, 3.5), - }; + TTimeDoublePr bucket14[] = {TTimeDoublePr(1900, 3.5)}; TTimeDoublePr bucket15[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), TTimeDoublePr(2490, 3.8)}; TTimeDoublePrVecVec buckets1; @@ -570,36 +539,7 @@ void CMetricDataGathererTest::multipleSeriesTests() { CPPUNIT_ASSERT_EQUAL( std::string("[(2400 [21.6] 1 6)]"), core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - ml::core::CRapidXmlStatePersistInserter inserter("root"); - gatherer.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - LOG_DEBUG(<< "model XML representation:\n" << origXml); - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); - - // The XML representation of the new filter should be the - // same as the original - std::string newXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - restoredGatherer.acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - CPPUNIT_ASSERT_EQUAL(origXml, newXml); + testPersistence(params, gatherer); // Remove person p1. TSizeVec peopleToRemove; @@ -670,9 +610,9 @@ void CMetricDataGathererTest::testSampleCount() { features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 0); + EMPTY_STRING, {}, KEY, features, startTime, 0); std::size_t pid1 = addPerson("p1", gatherer, m_ResourceMonitor); std::size_t pid2 = addPerson("p2", gatherer, m_ResourceMonitor); @@ -716,7 +656,6 @@ void CMetricDataGathererTest::testSampleCount() { CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, gatherer.effectiveSampleCount(pid2), 1.0 + 1e-5); for (std::size_t i = numberBuckets; i < 100; ++i) { - LOG_DEBUG(<< "Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); addArrival(gatherer, m_ResourceMonitor, startTime + i * bucketLength + 10, "p1", 1.0); @@ -737,9 +676,9 @@ void CMetricDataGathererTest::testRemovePeople() { features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 0); + EMPTY_STRING, {}, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), addPerson("p2", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(2), addPerson("p3", gatherer, m_ResourceMonitor)); @@ -784,10 +723,10 @@ void CMetricDataGathererTest::testRemovePeople() { peopleToRemove.push_back(1); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + {}, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p3", expectedGatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), @@ -823,10 +762,10 @@ void CMetricDataGathererTest::testRemovePeople() { peopleToRemove.push_back(7); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + {}, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p3", expectedGatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), @@ -856,10 +795,10 @@ void CMetricDataGathererTest::testRemovePeople() { peopleToRemove.push_back(6); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, + params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + {}, KEY, features, startTime, 0); LOG_DEBUG(<< "checksum = " << gatherer.checksum()); LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum()); @@ -891,16 +830,15 @@ void CMetricDataGathererTest::testSum() { SModelParams params(bucketLength); CDataGatherer sum(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, KEY, sumFeatures, startTime, 0); + {}, KEY, sumFeatures, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", sum, m_ResourceMonitor)); TFeatureVec nonZeroSumFeatures; nonZeroSumFeatures.push_back(model_t::E_IndividualNonNullSumByBucketAndPerson); - CDataGatherer nonZeroSum(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, KEY, nonZeroSumFeatures, startTime, 0); + CDataGatherer nonZeroSum(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + {}, KEY, nonZeroSumFeatures, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p1", nonZeroSum, m_ResourceMonitor)); core_t::TTime bucketStart = startTime; @@ -995,9 +933,8 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { features.push_back(model_t::E_IndividualSumByBucketAndPerson); features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 2u); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); @@ -1034,7 +971,7 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { CPPUNIT_ASSERT_EQUAL(true, featureData[3].second[0].second.s_IsInteger); } - for (size_t i = 1; i < boost::size(bucket1); ++i) { + for (std::size_t i = 1; i < boost::size(bucket1); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", bucket1[i].second); } @@ -1068,42 +1005,12 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { CPPUNIT_ASSERT_EQUAL(std::string("[(0 [7.5] 1 5)]"), core::CContainerPrinter::print( featureData[3].second[0].second.s_Samples)); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - gatherer.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - LOG_DEBUG(<< "gatherer XML representation:\n" << origXml); - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); - - // The XML representation of the new filter should be the - // same as the original - std::string newXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - restoredGatherer.acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - CPPUNIT_ASSERT_EQUAL(origXml, newXml); + testPersistence(params, gatherer); } gatherer.timeNow(startTime + bucketLength); - for (size_t i = 0; i < boost::size(bucket2); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket2[i].first, "p", - bucket2[i].second); + for (const auto& value : bucket2) { + addArrival(gatherer, m_ResourceMonitor, value.first, "p", value.second); } { TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1131,36 +1038,7 @@ void CMetricDataGathererTest::singleSeriesOutOfOrderTests() { CPPUNIT_ASSERT_EQUAL(std::string("[(0 [9] 1 6)]"), core::CContainerPrinter::print( featureData[3].second[0].second.s_Samples)); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - gatherer.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - LOG_DEBUG(<< "model XML representation:\n" << origXml); - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); - - // The XML representation of the new filter should be the - // same as the original - std::string newXml; - { - ml::core::CRapidXmlStatePersistInserter inserter("root"); - restoredGatherer.acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - CPPUNIT_ASSERT_EQUAL(origXml, newXml); + testPersistence(params, gatherer); } } } @@ -1173,25 +1051,22 @@ void CMetricDataGathererTest::testResetBucketGivenSingleSeries() { params.s_SampleCountFactor = 1; params.s_SampleQueueGrowthFactor = 0.1; - TTimeDoublePr data[] = { - TTimeDoublePr(1, 1.0), // Bucket 1 - TTimeDoublePr(550, 2.0), TTimeDoublePr(600, 3.0), // Bucket 2 - TTimeDoublePr(700, 4.0), TTimeDoublePr(1000, 5.0), - TTimeDoublePr(1200, 6.0) // Bucket 3 - }; + TTimeDoublePr data[] = {TTimeDoublePr(1, 1.0), TTimeDoublePr(550, 2.0), + TTimeDoublePr(600, 3.0), TTimeDoublePr(700, 4.0), + TTimeDoublePr(1000, 5.0), TTimeDoublePr(1200, 6.0)}; TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 2u); + EMPTY_STRING, {}, KEY, features, startTime, 2u); addPerson("p", gatherer, m_ResourceMonitor); - for (std::size_t i = 0; i < boost::size(data); ++i) { - addArrival(gatherer, m_ResourceMonitor, data[i].first, "p", data[i].second); + for (const auto& value : data) { + addArrival(gatherer, m_ResourceMonitor, value.first, "p", value.second); } TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1286,29 +1161,26 @@ void CMetricDataGathererTest::testResetBucketGivenMultipleSeries() { params.s_SampleCountFactor = 1; params.s_SampleQueueGrowthFactor = 0.1; - TTimeDoublePr data[] = { - TTimeDoublePr(1, 1.0), // Bucket 1 - TTimeDoublePr(550, 2.0), TTimeDoublePr(600, 3.0), // Bucket 2 - TTimeDoublePr(700, 4.0), TTimeDoublePr(1000, 5.0), - TTimeDoublePr(1200, 6.0) // Bucket 3 - }; + TTimeDoublePr data[] = {TTimeDoublePr(1, 1.0), TTimeDoublePr(550, 2.0), + TTimeDoublePr(600, 3.0), TTimeDoublePr(700, 4.0), + TTimeDoublePr(1000, 5.0), TTimeDoublePr(1200, 6.0)}; TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); features.push_back(model_t::E_IndividualMinByPerson); features.push_back(model_t::E_IndividualMaxByPerson); features.push_back(model_t::E_IndividualSumByBucketAndPerson); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, KEY, features, startTime, 2u); + EMPTY_STRING, {}, KEY, features, startTime, 2u); addPerson("p1", gatherer, m_ResourceMonitor); addPerson("p2", gatherer, m_ResourceMonitor); addPerson("p3", gatherer, m_ResourceMonitor); - for (std::size_t i = 0; i < boost::size(data); ++i) { + for (const auto& value : data) { for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) { - addArrival(gatherer, m_ResourceMonitor, data[i].first, - gatherer.personName(pid), data[i].second); + addArrival(gatherer, m_ResourceMonitor, value.first, + gatherer.personName(pid), value.second); } } @@ -1591,8 +1463,8 @@ void CMetricDataGathererTest::testInfluenceStatistics() { features.push_back(model_t::E_IndividualSumByBucketAndPerson); TStrVec influencerNames(boost::begin(influencerNames_), boost::end(influencerNames_)); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerNames, false, KEY, features, startTime, 2u); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + influencerNames, KEY, features, startTime, 2u); addPerson("p1", gatherer, m_ResourceMonitor, influencerNames.size()); addPerson("p2", gatherer, m_ResourceMonitor, influencerNames.size()); @@ -1614,7 +1486,7 @@ void CMetricDataGathererTest::testInfluenceStatistics() { m < data_[k].second.s_InfluenceValues.size(); ++m) { for (std::size_t n = 0u; n < data_[k].second.s_InfluenceValues[m].size(); ++n) { - statistics.push_back(TStrDoubleDoublePrPr( + statistics.emplace_back( data_[k].second.s_InfluenceValues[m][n].first, TDoubleDoublePr( data_[k] @@ -1622,7 +1494,7 @@ void CMetricDataGathererTest::testInfluenceStatistics() { .second.first[0], data_[k] .second.s_InfluenceValues[m][n] - .second.second))); + .second.second)); } } std::sort(statistics.begin(), statistics.end(), @@ -1671,9 +1543,7 @@ void CMetricDataGathererTest::testMultivariate() { TTimeDoubleDoubleTuple(1199, 1.8, 1.6)}; TTimeDoubleDoubleTuple bucket3[] = {TTimeDoubleDoubleTuple(1200, 2.1, 2.0), TTimeDoubleDoubleTuple(1250, 2.5, 2.4)}; - TTimeDoubleDoubleTuple bucket4[] = { - TTimeDoubleDoubleTuple(1900, 3.5, 3.2), - }; + TTimeDoubleDoubleTuple bucket4[] = {TTimeDoubleDoubleTuple(1900, 3.5, 3.2)}; TTimeDoubleDoubleTuple bucket5[] = {TTimeDoubleDoubleTuple(2420, 3.5, 3.2), TTimeDoubleDoubleTuple(2480, 3.2, 3.0), TTimeDoubleDoubleTuple(2490, 3.8, 3.8)}; @@ -1682,10 +1552,9 @@ void CMetricDataGathererTest::testMultivariate() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanLatLongByPerson); TStrVec influencerNames; - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, influencerNames, - false, KEY, features, startTime, 2u); + influencerNames, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); @@ -1713,7 +1582,7 @@ void CMetricDataGathererTest::testMultivariate() { CPPUNIT_ASSERT_EQUAL(true, featureData[0].second[0].second.s_IsInteger); } - for (size_t i = 1; i < boost::size(bucket1); ++i) { + for (std::size_t i = 1; i < boost::size(bucket1); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket1[i].get<0>(), "p", bucket1[i].get<1>(), bucket1[i].get<2>(), DELIMITER); } @@ -1734,42 +1603,13 @@ void CMetricDataGathererTest::testMultivariate() { CPPUNIT_ASSERT_EQUAL( std::string("[(8 [1.55, 1.5] 1 2), (185 [1.2, 1.1] 1 2), (475 [1.75, 1.6] 1 2)]"), core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - gatherer.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - LOG_DEBUG(<< "gatherer XML representation:\n" << origXml); - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); - - // The XML representation of the new filter should be the - // same as the original - std::string newXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - restoredGatherer.acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - CPPUNIT_ASSERT_EQUAL(origXml, newXml); + testPersistence(params, gatherer); } gatherer.timeNow(startTime + bucketLength); - for (size_t i = 0; i < boost::size(bucket2); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket2[i].get<0>(), "p", - bucket2[i].get<1>(), bucket2[i].get<2>(), DELIMITER); + for (const auto& value : bucket2) { + addArrival(gatherer, m_ResourceMonitor, value.get<0>(), "p", + value.get<1>(), value.get<2>(), DELIMITER); } { TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1784,42 +1624,13 @@ void CMetricDataGathererTest::testMultivariate() { CPPUNIT_ASSERT_EQUAL(std::string("[(700 [2.1, 1.9] 1 2)]"), core::CContainerPrinter::print( featureData[0].second[0].second.s_Samples)); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - gatherer.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - LOG_DEBUG(<< "model XML representation:\n" << origXml); - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - - CDataGatherer restoredGatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, traverser); - - // The XML representation of the new filter should be the - // same as the original - std::string newXml; - { - ml::core::CRapidXmlStatePersistInserter inserter("root"); - restoredGatherer.acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - CPPUNIT_ASSERT_EQUAL(origXml, newXml); + testPersistence(params, gatherer); } gatherer.timeNow(startTime + 2 * bucketLength); - for (size_t i = 0; i < boost::size(bucket3); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket3[i].get<0>(), "p", - bucket3[i].get<1>(), bucket3[i].get<2>(), DELIMITER); + for (const auto& value : bucket3) { + addArrival(gatherer, m_ResourceMonitor, value.get<0>(), "p", + value.get<1>(), value.get<2>(), DELIMITER); } { TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1842,9 +1653,8 @@ void CMetricDataGathererTest::testMultivariate() { TFeatureVec features; features.push_back(model_t::E_IndividualMeanLatLongByPerson); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime, 0); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson("p", gatherer, m_ResourceMonitor)); TTimeDoubleDoubleTupleVecVec buckets; @@ -1938,20 +1748,15 @@ void CMetricDataGathererTest::testVarp() { TFeatureVec features; features.push_back(model_t::E_IndividualVarianceByPerson); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, KEY, features, startTime, 2u); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(person, gatherer, m_ResourceMonitor)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), gatherer.numberFeatures()); { - values.push_back(5.0); - values.push_back(6.0); - values.push_back(3.0); - values.push_back(2.0); - values.push_back(4.0); + values.assign({5.0, 6.0, 3.0, 2.0, 4.0}); addArrivals(gatherer, m_ResourceMonitor, startTime, 10, person, values); gatherer.sampleNow(startTime); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1969,15 +1774,7 @@ void CMetricDataGathererTest::testVarp() { } startTime += bucketLength; { - values.clear(); - values.push_back(115.0); - values.push_back(116.0); - values.push_back(117.0); - values.push_back(1111.5); - values.push_back(22.45); - values.push_back(2526.55634); - values.push_back(55.55); - values.push_back(14.723); + values.assign({115.0, 116.0, 117.0, 1111.5, 22.45, 2526.55634, 55.55, 14.723}); addArrivals(gatherer, m_ResourceMonitor, startTime, 100, person, values); gatherer.sampleNow(startTime); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -1994,8 +1791,7 @@ void CMetricDataGathererTest::testVarp() { gatherer.sampleNow(startTime); startTime += bucketLength; { - values.clear(); - values.push_back(0.0); + values.assign({0.0}); addArrivals(gatherer, m_ResourceMonitor, startTime, 100, person, values); gatherer.sampleNow(startTime); TFeatureSizeFeatureDataPrVecPrVec featureData; @@ -2013,10 +1809,9 @@ void CMetricDataGathererTest::testVarp() { TStrVec influencerFieldNames; influencerFieldNames.push_back("i"); influencerFieldNames.push_back("j"); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, influencerFieldNames, - false, KEY, features, startTime, 2u); + influencerFieldNames, KEY, features, startTime, 2u); CPPUNIT_ASSERT(!gatherer.isPopulation()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), addPerson(person, gatherer, m_ResourceMonitor, influencerFieldNames.size())); @@ -2052,20 +1847,7 @@ void CMetricDataGathererTest::testVarp() { CSample::TDouble1Vec v = featureData[0].second[0].second.s_BucketValue->value(); - values.clear(); - values.push_back(5.0); - values.push_back(5.5); - values.push_back(5.9); - values.push_back(5.2); - values.push_back(5.1); - values.push_back(2.2); - values.push_back(4.9); - values.push_back(5.1); - values.push_back(5.0); - values.push_back(12.12); - values.push_back(5.2); - values.push_back(5.0); - values.push_back(1.0); + values.assign({5.0, 5.5, 5.9, 5.2, 5.1, 2.2, 4.9, 5.1, 5.0, 12.12, 5.2, 5.0, 1.0}); double expectedMean = 0; double expectedVariance = ::variance(values, expectedMean); CPPUNIT_ASSERT_DOUBLES_EQUAL(v[0], expectedVariance, 0.0001); diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.cc b/lib/model/unittest/CMetricPopulationDataGathererTest.cc index 82bec29d41..2c70ee55fb 100644 --- a/lib/model/unittest/CMetricPopulationDataGathererTest.cc +++ b/lib/model/unittest/CMetricPopulationDataGathererTest.cc @@ -395,7 +395,7 @@ void CMetricPopulationDataGathererTest::testSampleCount() { const std::string attribute("c1"); const std::string person("p1"); const std::size_t numberBuckets = 40; - const std::size_t personMessageCount[numberBuckets] = { + const std::size_t personMessageCount[] = { 11, 11, 11, 11, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97}; @@ -654,9 +654,8 @@ void CMetricPopulationDataGathererTest::testRemovePeople() { features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, searchKey, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(startTime, messages); @@ -793,9 +792,8 @@ void CMetricPopulationDataGathererTest::testRemoveAttributes() { features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, searchKey, features, startTime, 0); + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(startTime, messages); @@ -961,10 +959,9 @@ void CMetricPopulationDataGathererTest::testInfluenceStatistics() { features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationHighSumByBucketPersonAndAttribute); TStrVec influencerNames(boost::begin(influencerNames_), boost::end(influencerNames_)); - CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, + CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, influencerNames, false, - searchKey, features, startTime, 2u); + influencerNames, searchKey, features, startTime, 2u); core_t::TTime bucketStart = startTime; for (std::size_t i = 0u, b = 0u; i < boost::size(data); ++i) { @@ -1024,9 +1021,9 @@ void CMetricPopulationDataGathererTest::testPersistence() { features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); features.push_back(model_t::E_PopulationHighSumByBucketPersonAndAttribute); CDataGatherer origDataGatherer(model_t::E_PopulationMetric, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, searchKey, features, startTime, 0); + params, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, {}, + searchKey, features, startTime, 0); TMessageVec messages; generateTestMessages(startTime, messages); @@ -1062,10 +1059,10 @@ void CMetricPopulationDataGathererTest::testPersistence() { CPPUNIT_ASSERT(parser.parseStringIgnoreCdata(origXml)); core::CRapidXmlStateRestoreTraverser traverser(parser); - CDataGatherer restoredDataGatherer(model_t::E_PopulationMetric, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + CDataGatherer restoredDataGatherer(model_t::E_PopulationMetric, + model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - TStrVec(), false, searchKey, traverser); + EMPTY_STRING, {}, searchKey, traverser); // The XML representation of the new data gatherer should be the same as the // original diff --git a/lib/model/unittest/CModelDetailsViewTest.cc b/lib/model/unittest/CModelDetailsViewTest.cc index c2f5080948..444d456400 100644 --- a/lib/model/unittest/CModelDetailsViewTest.cc +++ b/lib/model/unittest/CModelDetailsViewTest.cc @@ -44,10 +44,10 @@ void CModelDetailsViewTest::testModelPlot() { TMockModelPtr model; auto setupTest = [&]() { - gatherer.reset(new model::CDataGatherer{ + gatherer = std::make_shared( model_t::analysisCategory(features[0]), model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, "p", EMPTY_STRING, - EMPTY_STRING, TStrVec(), false, key, features, 0, 0}); + EMPTY_STRING, EMPTY_STRING, "p", EMPTY_STRING, EMPTY_STRING, + TStrVec{}, key, features, 0, 0); std::string person11{"p11"}; std::string person12{"p12"}; std::string person21{"p21"}; @@ -81,12 +81,10 @@ void CModelDetailsViewTest::testModelPlot() { setupTest(); TDoubleVec values{2.0, 3.0, 0.0, 0.0}; - { - std::size_t pid{0}; - for (auto value : values) { - model->mockAddBucketValue(model_t::E_IndividualSumByBucketAndPerson, - pid++, 0, 0, {value}); - } + std::size_t pid{0}; + for (auto value : values) { + model->mockAddBucketValue(model_t::E_IndividualSumByBucketAndPerson, + pid++, 0, 0, {value}); } model::CModelPlotData plotData; @@ -95,7 +93,6 @@ void CModelDetailsViewTest::testModelPlot() { for (const auto& featureByFieldData : plotData) { CPPUNIT_ASSERT_EQUAL(values.size(), featureByFieldData.second.size()); for (const auto& byFieldData : featureByFieldData.second) { - std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(byFieldData.first, pid)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), byFieldData.second.s_ValuesPerOverField.size()); @@ -112,12 +109,10 @@ void CModelDetailsViewTest::testModelPlot() { setupTest(); TDoubleVec values{0.0, 1.0, 3.0}; - { - std::size_t pid{0}; - for (auto value : values) { - model->mockAddBucketValue(model_t::E_IndividualCountByBucketAndPerson, - pid++, 0, 0, {value}); - } + std::size_t pid{0}; + for (auto value : values) { + model->mockAddBucketValue(model_t::E_IndividualCountByBucketAndPerson, + pid++, 0, 0, {value}); } model::CModelPlotData plotData; @@ -126,7 +121,6 @@ void CModelDetailsViewTest::testModelPlot() { for (const auto& featureByFieldData : plotData) { CPPUNIT_ASSERT_EQUAL(values.size(), featureByFieldData.second.size()); for (const auto& byFieldData : featureByFieldData.second) { - std::size_t pid; CPPUNIT_ASSERT(gatherer->personId(byFieldData.first, pid)); CPPUNIT_ASSERT_EQUAL(std::size_t(1), byFieldData.second.s_ValuesPerOverField.size()); diff --git a/lib/model/unittest/CResourceLimitTest.cc b/lib/model/unittest/CResourceLimitTest.cc index a7aeb75e0b..bd90237207 100644 --- a/lib/model/unittest/CResourceLimitTest.cc +++ b/lib/model/unittest/CResourceLimitTest.cc @@ -265,6 +265,7 @@ class CMockEventRateModel : public ml::model::CEventRateModel { } void test(core_t::TTime time) { + m_ResourceMonitor.clearExtraMemory(); this->createUpdateNewModels(time, m_ResourceMonitor); } @@ -307,6 +308,7 @@ class CMockMetricModel : public ml::model::CMetricModel { } void test(core_t::TTime time) { + m_ResourceMonitor.clearExtraMemory(); this->createUpdateNewModels(time, m_ResourceMonitor); } @@ -388,7 +390,7 @@ void CResourceLimitTest::testLargeAllocations() { factory.features(features); CModelFactory::TDataGathererPtr gatherer(factory.makeDataGatherer(FIRST_TIME)); - CResourceMonitor resourceMonitor; + CResourceMonitor resourceMonitor(1.0); resourceMonitor.memoryLimit(std::size_t(70)); const maths::CMultinomialConjugate conjugate; ::CMockEventRateModel model( @@ -425,20 +427,22 @@ void CResourceLimitTest::testLargeAllocations() { LOG_DEBUG(<< "Testing for 2nd time"); model.test(time); - CPPUNIT_ASSERT_EQUAL(std::size_t(2000), model.getNewPeople()); + LOG_DEBUG(<< "# new people = " << model.getNewPeople()); + CPPUNIT_ASSERT(model.getNewPeople() > 2700 && model.getNewPeople() < 2900); CPPUNIT_ASSERT_EQUAL(std::size_t(0), model.getNewAttributes()); - CPPUNIT_ASSERT_EQUAL(std::size_t(2000), gatherer->numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(model.getNewPeople(), gatherer->numberActivePeople()); // Adding a small number of new people should be fine though, - // as there are allowed in + // as they're allowed in time += BUCKET_LENGTH; - ::addPersonData(4400, 4410, time, *gatherer, resourceMonitor); + std::size_t oldNumberPeople{model.getNewPeople()}; + ::addPersonData(3000, 3010, time, *gatherer, resourceMonitor); LOG_DEBUG(<< "Testing for 3rd time"); model.test(time); - CPPUNIT_ASSERT_EQUAL(std::size_t(2010), model.getNewPeople()); + CPPUNIT_ASSERT_EQUAL(oldNumberPeople + 10, model.getNewPeople()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), model.getNewAttributes()); - CPPUNIT_ASSERT_EQUAL(std::size_t(2010), gatherer->numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(model.getNewPeople(), gatherer->numberActivePeople()); } { // Test CMetricModel::createUpdateNewModels() @@ -488,28 +492,30 @@ void CResourceLimitTest::testLargeAllocations() { CPPUNIT_ASSERT_EQUAL(std::size_t(0), model.getNewAttributes()); time += BUCKET_LENGTH; - ::addPersonMetricData(400, 900, time, *gatherer, resourceMonitor); + ::addPersonMetricData(400, 1000, time, *gatherer, resourceMonitor); model.test(time); // This should add enough people to go over the memory limit - ::addPersonMetricData(900, 4400, time, *gatherer, resourceMonitor); + ::addPersonMetricData(1000, 3000, time, *gatherer, resourceMonitor); LOG_DEBUG(<< "Testing for 2nd time"); model.test(time); - CPPUNIT_ASSERT_EQUAL(std::size_t(1400), model.getNewPeople()); + LOG_DEBUG(<< "# new people = " << model.getNewPeople()); + CPPUNIT_ASSERT(model.getNewPeople() > 2700 && model.getNewPeople() < 2900); CPPUNIT_ASSERT_EQUAL(std::size_t(0), model.getNewAttributes()); - CPPUNIT_ASSERT_EQUAL(std::size_t(1400), gatherer->numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(model.getNewPeople(), gatherer->numberActivePeople()); // Adding a small number of new people should be fine though, // as they are are allowed in time += BUCKET_LENGTH; - ::addPersonMetricData(4400, 4410, time, *gatherer, resourceMonitor); + std::size_t oldNumberPeople{model.getNewPeople()}; + ::addPersonMetricData(3000, 3010, time, *gatherer, resourceMonitor); LOG_DEBUG(<< "Testing for 3rd time"); model.test(time); - CPPUNIT_ASSERT_EQUAL(std::size_t(1410), model.getNewPeople()); + CPPUNIT_ASSERT_EQUAL(oldNumberPeople + 10, model.getNewPeople()); CPPUNIT_ASSERT_EQUAL(std::size_t(0), model.getNewAttributes()); - CPPUNIT_ASSERT_EQUAL(std::size_t(1410), gatherer->numberActivePeople()); + CPPUNIT_ASSERT_EQUAL(model.getNewPeople(), gatherer->numberActivePeople()); } } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index 2c617e87db..b9a6445d50 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -67,7 +67,7 @@ void CResourceMonitorTest::testMonitor() { // Test default constructor CResourceMonitor mon; CPPUNIT_ASSERT(mon.m_ByteLimitHigh > 0); - CPPUNIT_ASSERT_EQUAL(mon.m_ByteLimitLow + 1024, mon.m_ByteLimitHigh); + CPPUNIT_ASSERT_EQUAL((49 * mon.m_ByteLimitHigh) / 50, mon.m_ByteLimitLow); CPPUNIT_ASSERT(mon.m_ByteLimitHigh > mon.m_ByteLimitLow); CPPUNIT_ASSERT(mon.m_AllowAllocations); LOG_DEBUG(<< "Resource limit is: " << mon.m_ByteLimitHigh); @@ -87,13 +87,13 @@ void CResourceMonitorTest::testMonitor() { CResourceMonitor mon; mon.memoryLimit(543); CPPUNIT_ASSERT_EQUAL(std::size_t(569376768 / 2), mon.m_ByteLimitHigh); - CPPUNIT_ASSERT_EQUAL(std::size_t(569376768 / 2 - 1024), mon.m_ByteLimitLow); + CPPUNIT_ASSERT_EQUAL(std::size_t((49 * 569376768ull / 2) / 50), mon.m_ByteLimitLow); CPPUNIT_ASSERT(mon.m_AllowAllocations); // Test memoryLimit mon.memoryLimit(987); CPPUNIT_ASSERT_EQUAL(std::size_t(1034944512ull / 2), mon.m_ByteLimitHigh); - CPPUNIT_ASSERT_EQUAL(std::size_t(1034944512ull / 2 - 1024), mon.m_ByteLimitLow); + CPPUNIT_ASSERT_EQUAL(std::size_t((49 * 1034944512ull / 2) / 50), mon.m_ByteLimitLow); } { // Test adding and removing a CAnomalyDetector @@ -123,7 +123,7 @@ void CResourceMonitorTest::testMonitor() { } { // Check that High limit can be breached and then gone back - CResourceMonitor mon; + CResourceMonitor mon(1.0); CPPUNIT_ASSERT(mem > 5); // This SHOULD be OK // Let's go above the low but below the high limit @@ -306,7 +306,7 @@ void CResourceMonitorTest::testPruning() { CAnomalyDetectorModelConfig modelConfig = CAnomalyDetectorModelConfig::defaultConfig(BUCKET_LENGTH); - CLimits limits; + CLimits limits(1.0); CSearchKey key(1, // identifier function_t::E_IndividualMetric, false, model_t::E_XF_None, diff --git a/lib/model/unittest/CRuleConditionTest.cc b/lib/model/unittest/CRuleConditionTest.cc index 0489763142..7666e3c231 100644 --- a/lib/model/unittest/CRuleConditionTest.cc +++ b/lib/model/unittest/CRuleConditionTest.cc @@ -48,10 +48,9 @@ void CRuleConditionTest::testTimeContition() { model_t::TFeatureVec features; features.push_back(model_t::E_IndividualMeanByPerson); - CAnomalyDetectorModel::TDataGathererPtr gathererPtr(new CDataGatherer( - model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec(), - false, key, features, startTime, 0)); + CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( + model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); CMockModel model(params, gathererPtr, influenceCalculators); From f9271029a98d201d5f00ffbd51954e1492a40bb4 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 8 Jun 2018 15:34:33 +0100 Subject: [PATCH 3/5] A couple of bug fixes, some more comments, make decrease to margin a function of elapsed time not buckets and assert on memory used vs target in limit test. --- include/model/CResourceMonitor.h | 2 +- lib/api/CAnomalyJob.cc | 2 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 30 ++++++++++++++------- lib/maths/CTimeSeriesDecompositionDetail.cc | 2 ++ lib/model/CPopulationModel.cc | 12 ++++----- lib/model/CResourceMonitor.cc | 13 ++++++--- 6 files changed, 39 insertions(+), 22 deletions(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index f8dd8fd5cb..a6fa2e1950 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -135,7 +135,7 @@ class MODEL_EXPORT CResourceMonitor { //! it is difficult to accurately estimate the long term memory //! usage at this point. This is gradually decreased over time //! by calling this pnce per bucket processed. - void decreaseMargin(); + void decreaseMargin(core_t::TTime elapsedTime); private: //! Updates the memory limit fields and the prune threshold diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index d028ebc9b3..9ee5446dab 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -413,7 +413,7 @@ void CAnomalyJob::outputBucketResultsUntil(core_t::TTime time) { lastBucketEndTime + bucketLength + latency <= time; lastBucketEndTime += effectiveBucketLength) { this->outputResults(lastBucketEndTime); - m_Limits.resourceMonitor().decreaseMargin(); + m_Limits.resourceMonitor().decreaseMargin(bucketLength); m_Limits.resourceMonitor().sendMemoryUsageReportIfSignificantlyChanged(lastBucketEndTime); m_LastFinalisedBucketEndTime = lastBucketEndTime + effectiveBucketLength; diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index ddd4ade9a3..a4735f531a 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -353,8 +353,9 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "**** Test by ****"); { + std::size_t memoryLimit{10 /*MB*/}; model::CLimits limits; - limits.resourceMonitor().memoryLimit(10); + limits.resourceMonitor().memoryLimit(memoryLimit); api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses{"mean(foo)", "by", "bar"}; fieldConfig.initFromClause(clauses); @@ -363,7 +364,7 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); core_t::TTime startTime{1495110323}; - core_t::TTime endTime{1495230323}; + core_t::TTime endTime{1495260323}; core_t::TTime time{startTime}; double reportProgress{0.0}; for (/**/; time < endTime; time += bucketLength) { @@ -389,15 +390,19 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); - LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_ByFields > 700 && used.s_ByFields < 900); + LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); + LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); + CPPUNIT_ASSERT(used.s_ByFields > 600 && used.s_ByFields < 800); CPPUNIT_ASSERT_EQUAL(std::size_t(2), used.s_PartitionFields); + CPPUNIT_ASSERT_DOUBLES_EQUAL(memoryLimit * 1024 * 1024 / 2, used.s_Usage, + memoryLimit * 1024 * 1024 / 40); // Within 5%. } LOG_DEBUG(<< "**** Test partition ****"); { + std::size_t memoryLimit{10 /*MB*/}; model::CLimits limits; - limits.resourceMonitor().memoryLimit(10); + limits.resourceMonitor().memoryLimit(memoryLimit); api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses{"mean(foo)", "partitionfield=bar"}; fieldConfig.initFromClause(clauses); @@ -406,7 +411,7 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { api::CAnomalyJob job("job", limits, fieldConfig, modelConfig, wrappedOutputStream); core_t::TTime startTime{1495110323}; - core_t::TTime endTime{1495230323}; + core_t::TTime endTime{1495260323}; core_t::TTime time{startTime}; double reportProgress{0.0}; for (/**/; time < endTime; time += bucketLength) { @@ -433,15 +438,18 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_PartitionFields > 400 && used.s_PartitionFields < 500); + CPPUNIT_ASSERT(used.s_PartitionFields > 350 && used.s_PartitionFields < 450); CPPUNIT_ASSERT(static_cast(used.s_ByFields) > - 0.95 * static_cast(used.s_PartitionFields)); + 0.97 * static_cast(used.s_PartitionFields)); + CPPUNIT_ASSERT_DOUBLES_EQUAL(memoryLimit * 1024 * 1024 / 2, used.s_Usage, + memoryLimit * 1024 * 1024 / 40); // Within 5%. } LOG_DEBUG(<< "**** Test over ****"); { + std::size_t memoryLimit{5 /*MB*/}; model::CLimits limits; - limits.resourceMonitor().memoryLimit(5); + limits.resourceMonitor().memoryLimit(memoryLimit); api::CFieldConfig fieldConfig; api::CFieldConfig::TStrVec clauses{"mean(foo)", "over", "bar"}; fieldConfig.initFromClause(clauses); @@ -475,6 +483,8 @@ void CAnomalyJobLimitTest::testModelledEntityCountForFixedMemoryLimit() { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - CPPUNIT_ASSERT(used.s_OverFields > 8000 && used.s_OverFields < 9000); + CPPUNIT_ASSERT(used.s_OverFields > 6000 && used.s_OverFields < 7000); + CPPUNIT_ASSERT_DOUBLES_EQUAL(memoryLimit * 1024 * 1024 / 2, used.s_Usage, + memoryLimit * 1024 * 1024 / 40); // Within 5%. } } diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index bc36f0b04c..c9c9190c60 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -652,6 +652,8 @@ std::size_t CTimeSeriesDecompositionDetail::CPeriodicityTest::extraMemoryOnIniti if (result == 0) { for (auto i : {E_Short, E_Long}) { TExpandingWindowPtr window(this->newWindow(i, false)); + // The 0.3 is a rule-of-thumb estimate of the worst case + // compression ratio we achieve on the test state. result += 0.3 * core::CMemory::dynamicSize(window); } } diff --git a/lib/model/CPopulationModel.cc b/lib/model/CPopulationModel.cc index 737bb5d23b..f048da5699 100644 --- a/lib/model/CPopulationModel.cc +++ b/lib/model/CPopulationModel.cc @@ -375,13 +375,8 @@ void CPopulationModel::createUpdateNewModels(core_t::TTime time, } } - this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, - numberExistingAttributes, 0); - - while (numberNewAttributes > 0 && - (numberExistingAttributes == 0 || - (resourceMonitor.areAllocationsAllowed() && - (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)))) { + while (numberNewAttributes > 0 && resourceMonitor.areAllocationsAllowed() && + (resourceMonitor.haveNoLimit() || ourUsage < resourceLimit)) { // We batch attributes in CHUNK_SIZE (500) and create models in chunks // and test usage after each chunk. std::size_t numberToCreate = std::min(numberNewAttributes, CHUNK_SIZE); @@ -397,6 +392,9 @@ void CPopulationModel::createUpdateNewModels(core_t::TTime time, } } + this->estimateMemoryUsageOrComputeAndUpdate(numberExistingPeople, + numberExistingAttributes, 0); + if (numberNewPeople > 0) { resourceMonitor.acceptAllocationFailureResult(time); LOG_DEBUG(<< "Not enough memory to create person models"); diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 4e02933c31..3469d8e6a1 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -168,7 +168,7 @@ bool CResourceMonitor::pruneIfRequired(core_t::TTime endTime) { for (auto& detector : m_Detectors) { const auto& model = detector.first->model(); model->prune(m_PruneWindow); - detector.second = model->memoryUsage(); + detector.second = core::CMemory::dynamicSize(detector.first); usageAfter += detector.second; } m_CurrentAnomalyDetectorMemory = usageAfter; @@ -305,8 +305,15 @@ void CResourceMonitor::clearExtraMemory() { } } -void CResourceMonitor::decreaseMargin() { - m_ByteLimitMargin = 1.0 - 0.99 * (1.0 - m_ByteLimitMargin); +void CResourceMonitor::decreaseMargin(core_t::TTime elapsedTime) { + // We choose to increase the margin to close to 1 on the order + // time it takes to detect diurnal periodic components. These + // will be the overwhelmingly common source of additional memory + // so the model memory should be accurate (on average) in this + // time frame. + double scale{1.0 - static_cast(elapsedTime) / + static_cast(core::constants::DAY)}; + m_ByteLimitMargin = 1.0 - scale * (1.0 - m_ByteLimitMargin); } std::size_t CResourceMonitor::highLimit() const { From af04280503114d35150027379691011abf07573a Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Fri, 8 Jun 2018 15:44:35 +0100 Subject: [PATCH 4/5] Change log --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 28426be96f..d1e6ebacc2 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ Improve behavior when there are abrupt changes in the seasonal components presen Explicit change point detection and modelling ({pull}92[#92]) Improve partition analysis memory usage ({pull}97[#97]) Reduce model memory by storing state for periodicity testing in a compressed format ({pull}100[#100]) +Improve the accuracy of model memory control ({pull}122[#122]) Forecasting of Machine Learning job time series is now supported for large jobs by temporarily storing model state on disk ({pull}89[#89]) From fe5a06364c513e7eea797f7e568019f22df15f99 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 12 Jun 2018 17:25:44 +0100 Subject: [PATCH 5/5] Review comment --- lib/model/CAnomalyDetectorModel.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/model/CAnomalyDetectorModel.cc b/lib/model/CAnomalyDetectorModel.cc index 0f1e601dda..628adb6041 100644 --- a/lib/model/CAnomalyDetectorModel.cc +++ b/lib/model/CAnomalyDetectorModel.cc @@ -95,7 +95,7 @@ CAnomalyDetectorModel::CAnomalyDetectorModel(bool isForPersistence, : // The copy of m_DataGatherer is a shallow copy. This would be unacceptable // if we were going to persist the data gatherer from within this class. // We don't, so that's OK, but the next issue is that another thread will be - // modifying the data gatherer m_DataGatherer points too whilst this object + // modifying the data gatherer m_DataGatherer points to whilst this object // is being persisted. Therefore, persistence must only call methods on the // data gatherer that are invariant. m_Params(other.m_Params), m_DataGatherer(other.m_DataGatherer),