harmonize calling between boosting and interactions and add missing c…

…heck for allocation failure
interpretml · Nov 18, 2024 · 2975384 · 2975384
1 parent 17ea95d
commit 2975384
Show file tree

Hide file tree

Showing 3 changed files with 240 additions and 55 deletions.
diff --git a/shared/libebm/CalcInteractionStrength.cpp b/shared/libebm/CalcInteractionStrength.cpp
@@ -20,6 +20,8 @@
 #include "ebm_internal.hpp" // k_cDimensionsMax
 #include "Feature.hpp"
 #include "DataSetInteraction.hpp"
+#include "Tensor.hpp"
+#include "TreeNode.hpp"
 #include "InteractionCore.hpp"
 #include "InteractionShell.hpp"
 
@@ -73,6 +75,34 @@ extern double PartitionTwoDimensionalInteraction(InteractionCore* const pInterac
 #endif // NDEBUG
 );
 
+extern ErrorEbm PartitionTwoDimensionalBoosting(const bool bHessian,
+      const size_t cRuntimeScores,
+      const size_t cDimensions,
+      const size_t cRealDimensions,
+      const TermBoostFlags flags,
+      const size_t cSamplesLeafMin,
+      const FloatCalc hessianMin,
+      const FloatCalc regAlpha,
+      const FloatCalc regLambda,
+      const FloatCalc deltaStepMax,
+      const BinBase* const aBinsBase,
+      BinBase* const aAuxiliaryBinsBase,
+      Tensor* const pInnerTermUpdate,
+      void* const pRootTreeNodeBase,
+      const size_t* const acBins,
+      double* const aTensorWeights,
+      double* const aTensorGrad,
+      double* const aTensorHess,
+      double* const pTotalGain,
+      const size_t cPossibleSplits,
+      void* const pTemp1
+#ifndef NDEBUG
+      ,
+      const BinBase* const aDebugCopyBinsBase,
+      const BinBase* const pBinsEndDebug
+#endif // NDEBUG
+);
+
 // there is a race condition for decrementing this variable, but if a thread loses the
 // race then it just doesn't get decremented as quickly, which we can live with
 static int g_cLogCalcInteractionStrength = 10;
@@ -235,10 +265,6 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION CalcInteractionStrength(Interaction
       return Error_None;
    }
 
-   // TODO : we NEVER use the hessian term (currently) in GradientPair when calculating interaction scores, but we're
-   // spending time calculating it, and it's taking up precious memory.  We should eliminate the hessian term HERE in
-   // our datastructures OR we should think whether we can use the hessian as part of the gain function!!!
-
    BinSumsInteractionBridge binSums;
 
    const FeatureInteraction* const aFeatures = pInteractionCore->GetFeatures();
@@ -463,10 +489,11 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION CalcInteractionStrength(Interaction
 #endif // NDEBUG
    );
 
+   double bestGain;
    if(2 == cDimensions) {
       LOG_0(Trace_Verbose, "CalcInteractionStrength Starting bin sweep loop");
 
-      double bestGain = PartitionTwoDimensionalInteraction(pInteractionCore,
+      bestGain = PartitionTwoDimensionalInteraction(pInteractionCore,
             cDimensions,
             binSums.m_acBins,
             flags,
@@ -483,68 +510,223 @@ EBM_API_BODY ErrorEbm EBM_CALLING_CONVENTION CalcInteractionStrength(Interaction
             pDebugMainBinsEnd
 #endif // NDEBUG
       );
+   } else {
+      size_t cPossibleSplits;
+      if(IsOverflowBinSize<FloatMain, UIntMain>(true, true, bHessian, cScores)) {
+         // TODO: move this to init
+         return Error_OutOfMemory;
+      }
 
-      // if totalWeight < 1 then bestGain could overflow to +inf, so do the division first
-      const double totalWeight = pDataSet->GetWeightTotal();
-      EBM_ASSERT(0 < totalWeight); // if all are zeros we assume there are no weights and use the count
-      bestGain /= totalWeight;
-      if(CalcInteractionFlags_DisableNewton & flags) {
-         bestGain *= pInteractionCore->GainAdjustmentGradientBoosting();
-      } else {
-         bestGain /= pInteractionCore->HessianConstant();
-         bestGain *= pInteractionCore->GainAdjustmentHessianBoosting();
+      if(IsOverflowTreeNodeMultiSize(bHessian, cScores)) {
+         // TODO: move this to init
+         return Error_OutOfMemory;
       }
-      const double gradientConstant = pInteractionCore->GradientConstant();
-      bestGain *= gradientConstant;
-      bestGain *= gradientConstant;
-
-      if(UNLIKELY(/* NaN */ !LIKELY(bestGain <= std::numeric_limits<double>::max()))) {
-         // We simplify our caller's handling by returning -lowest as our error indicator. -lowest will sort to being
-         // the least important item, which is good, but it also signals an overflow without the weirness of NaNs.
-         EBM_ASSERT(std::isnan(bestGain) || std::numeric_limits<double>::infinity() == bestGain);
-         bestGain = k_illegalGainDouble;
-      } else if(UNLIKELY(bestGain < 0.0)) {
-         // gain can't mathematically be legally negative, but it can be here in the following situations:
-         //   1) for impure interaction gain we subtract the parent partial gain, and there can be floating point
-         //      noise that makes this slightly negative
-         //   2) for impure interaction gain we subtract the parent partial gain, but if there were no legal cuts
-         //      then the partial gain before subtracting the parent partial gain was zero and we then get a
-         //      substantially negative value.  In this case we should not have subtracted the parent partial gain
-         //      since we had never even calculated the 4 quadrant partial gain, but we handle this scenario
-         //      here instead of inside the templated function.
-
-         EBM_ASSERT(!std::isnan(bestGain));
-         // make bestGain k_illegalGainDouble if it's -infinity, otherwise make it zero
-         bestGain = std::numeric_limits<double>::lowest() <= bestGain ? 0.0 : k_illegalGainDouble;
-      } else {
-         EBM_ASSERT(!std::isnan(bestGain));
-         EBM_ASSERT(!std::isinf(bestGain));
+
+      cPossibleSplits = 0;
+
+      size_t cBytes = 1;
+
+      size_t* pcBins = binSums.m_acBins;
+      size_t* pcBinsEnd = binSums.m_acBins + cDimensions;
+      do {
+         const size_t cBins = *pcBins;
+         EBM_ASSERT(size_t{2} <= cBins);
+         const size_t cSplits = cBins - 1;
+         if(IsAddError(cPossibleSplits, cSplits)) {
+            return Error_OutOfMemory;
+         }
+         cPossibleSplits += cSplits;
+         if(IsMultiplyError(cBins, cBytes)) {
+            return Error_OutOfMemory;
+         }
+         cBytes *= cBins;
+         ++pcBins;
+      } while(pcBinsEnd != pcBins);
+
+      // For pairs, this calculates the exact max number of splits. For higher dimensions
+      // the max number of splits will be less, but it should be close enough.
+      // Each bin gets a tree node to record the gradient totals, and each split gets a TreeNode
+      // during construction. Each split contains a minimum of 1 bin on each side, so we have
+      // cBins - 1 potential splits.
+
+      if(IsAddError(cBytes, cBytes - 1)) {
+         return Error_OutOfMemory;
       }
+      cBytes = cBytes + cBytes - 1;
 
-      if(nullptr != avgInteractionStrengthOut) {
-         *avgInteractionStrengthOut = bestGain;
+      const size_t cBytesTreeNodeMulti = GetTreeNodeMultiSize(bHessian, cScores);
+
+      if(IsMultiplyError(cBytesTreeNodeMulti, cBytes)) {
+         return Error_OutOfMemory;
       }
+      cBytes *= cBytesTreeNodeMulti;
 
-      EBM_ASSERT(k_illegalGainDouble == bestGain || 0.0 <= bestGain);
-      LOG_COUNTED_N(pInteractionShell->GetPointerCountLogExitMessages(),
-            Trace_Info,
-            Trace_Verbose,
-            "Exited CalcInteractionStrength: "
-            "bestGain=%le",
-            bestGain);
-   } else {
-      LOG_0(Trace_Warning, "WARNING CalcInteractionStrength We only support pairs for interaction detection currently");
+      const size_t cBytesBest = cBytesTreeNodeMulti * (size_t{1} + (cDimensions << 1));
+      EBM_ASSERT(cBytesBest <= cBytes);
 
-      // TODO: handle interaction detection for higher dimensions
+      // double it because we during the multi-dimensional sweep we need the best and we need the current
+      if(IsAddError(cBytesBest, cBytesBest)) {
+         return Error_OutOfMemory;
+      }
+      const size_t cBytesSweep = cBytesBest + cBytesBest;
+
+      cBytes = EbmMax(cBytes, cBytesSweep);
+
+      double* aWeights = nullptr;
+      double* pGradient = nullptr;
+      double* pHessian = nullptr;
+      void* pTreeNodesTemp = nullptr;
+      void* pTemp1 = nullptr;
+
+      if(0 != (CalcInteractionFlags_Purify & flags)) {
+         // allocate the biggest tensor that is possible to split into
+
+         // TODO: cache this memory allocation so that we don't do it each time
 
-      // for now, just return any interactions that have other than 2 dimensions as k_illegalGainDouble,
-      // which means they won't be considered but indicates they were not handled
+         if(IsAddError(size_t{1}, cScores)) {
+            return Error_OutOfMemory;
+         }
+         size_t cItems = 1 + cScores;
+         const bool bUseLogitBoost = bHessian && !(CalcInteractionFlags_DisableNewton & flags);
+         if(bUseLogitBoost) {
+            if(IsAddError(cScores, cItems)) {
+               return Error_OutOfMemory;
+            }
+            cItems += cScores;
+         }
+         if(IsMultiplyError(sizeof(double), cItems, cTensorBins)) {
+            return Error_OutOfMemory;
+         }
+         aWeights = static_cast<double*>(malloc(sizeof(double) * cItems * cTensorBins));
+         if(nullptr == aWeights) {
+            return Error_OutOfMemory;
+         }
+         pGradient = aWeights + cTensorBins;
+         if(bUseLogitBoost) {
+            pHessian = pGradient + cTensorBins * cScores;
+         }
+      }
+
+      pTreeNodesTemp = malloc(cBytes);
+      if(nullptr == pTreeNodesTemp) {
+         free(aWeights);
+         return Error_OutOfMemory;
+      }
+
+      pTemp1 = malloc(cPossibleSplits * sizeof(unsigned char));
+      if(nullptr == pTemp1) {
+         free(pTreeNodesTemp);
+         free(aWeights);
+         return Error_OutOfMemory;
+      }
+
+      Tensor* const pInnerTermUpdate = Tensor::Allocate(k_cDimensionsMax, cScores);
+      if(nullptr == pInnerTermUpdate) {
+         free(pTemp1);
+         free(pTreeNodesTemp);
+         free(aWeights);
+         return Error_OutOfMemory;
+      }
+
+      error = PartitionTwoDimensionalBoosting(bHessian,
+            cScores,
+            cDimensions,
+            cDimensions,
+            flags,
+            cSamplesLeafMin,
+            hessianMin,
+            regAlpha,
+            regLambda,
+            deltaStepMax,
+            aMainBins,
+            aAuxiliaryBins,
+            pInnerTermUpdate,
+            pTreeNodesTemp,
+            binSums.m_acBins,
+            aWeights,
+            pGradient,
+            pHessian,
+            &bestGain,
+            cPossibleSplits,
+            pTemp1
+#ifndef NDEBUG
+            ,
+            aDebugCopyBins,
+            pDebugMainBinsEnd
+#endif // NDEBUG
+      );
+
+      Tensor::Free(pInnerTermUpdate);
+      free(pTemp1);
+      free(pTreeNodesTemp);
+      free(aWeights);
+
+      if(Error_None != error) {
+#ifndef NDEBUG
+         free(aDebugCopyBins);
+#endif // NDEBUG
+
+         LOG_0(Trace_Verbose, "Exited BoostMultiDimensional with Error code");
+
+         return error;
+      }
+      EBM_ASSERT(!std::isnan(bestGain));
+      EBM_ASSERT(0 <= bestGain);
    }
 
 #ifndef NDEBUG
    free(aDebugCopyBins);
 #endif // NDEBUG
 
+   // if totalWeight < 1 then bestGain could overflow to +inf, so do the division first
+   const double totalWeight = pDataSet->GetWeightTotal();
+   EBM_ASSERT(0 < totalWeight); // if all are zeros we assume there are no weights and use the count
+   bestGain /= totalWeight;
+   if(CalcInteractionFlags_DisableNewton & flags) {
+      bestGain *= pInteractionCore->GainAdjustmentGradientBoosting();
+   } else {
+      bestGain /= pInteractionCore->HessianConstant();
+      bestGain *= pInteractionCore->GainAdjustmentHessianBoosting();
+   }
+   const double gradientConstant = pInteractionCore->GradientConstant();
+   bestGain *= gradientConstant;
+   bestGain *= gradientConstant;
+
+   if(UNLIKELY(/* NaN */ !LIKELY(bestGain <= std::numeric_limits<double>::max()))) {
+      // We simplify our caller's handling by returning -lowest as our error indicator. -lowest will sort to being
+      // the least important item, which is good, but it also signals an overflow without the weirness of NaNs.
+      EBM_ASSERT(std::isnan(bestGain) || std::numeric_limits<double>::infinity() == bestGain);
+      bestGain = k_illegalGainDouble;
+   } else if(UNLIKELY(bestGain < 0.0)) {
+      // gain can't mathematically be legally negative, but it can be here in the following situations:
+      //   1) for impure interaction gain we subtract the parent partial gain, and there can be floating point
+      //      noise that makes this slightly negative
+      //   2) for impure interaction gain we subtract the parent partial gain, but if there were no legal cuts
+      //      then the partial gain before subtracting the parent partial gain was zero and we then get a
+      //      substantially negative value.  In this case we should not have subtracted the parent partial gain
+      //      since we had never even calculated the 4 quadrant partial gain, but we handle this scenario
+      //      here instead of inside the templated function.
+
+      EBM_ASSERT(!std::isnan(bestGain));
+      // make bestGain k_illegalGainDouble if it's -infinity, otherwise make it zero
+      bestGain = std::numeric_limits<double>::lowest() <= bestGain ? 0.0 : k_illegalGainDouble;
+   } else {
+      EBM_ASSERT(!std::isnan(bestGain));
+      EBM_ASSERT(!std::isinf(bestGain));
+   }
+
+   if(nullptr != avgInteractionStrengthOut) {
+      *avgInteractionStrengthOut = bestGain;
+   }
+
+   EBM_ASSERT(k_illegalGainDouble == bestGain || 0.0 <= bestGain);
+   LOG_COUNTED_N(pInteractionShell->GetPointerCountLogExitMessages(),
+         Trace_Info,
+         Trace_Verbose,
+         "Exited CalcInteractionStrength: "
+         "bestGain=%le",
+         bestGain);
+
    return Error_None;
 }
 

diff --git a/shared/libebm/GenerateTermUpdate.cpp b/shared/libebm/GenerateTermUpdate.cpp
@@ -355,6 +355,9 @@ static ErrorEbm BoostMultiDimensional(BoosterShell* const pBoosterShell,
          return Error_OutOfMemory;
       }
       aWeights = static_cast<double*>(malloc(sizeof(double) * cItems * cTensorBins));
+      if(nullptr == aWeights) {
+         return Error_OutOfMemory;
+      }
       pGradient = aWeights + cTensorBins;
       if(bUseLogitBoost) {
          pHessian = pGradient + cTensorBins * cScores;

diff --git a/shared/libebm/tests/interaction_unusual_inputs.cpp b/shared/libebm/tests/interaction_unusual_inputs.cpp
@@ -120,7 +120,7 @@ TEST_CASE("Term with one feature with two states, interaction, regression") {
          });
 
    double metricReturn = test.TestCalcInteractionStrength({0});
-   CHECK(std::numeric_limits<double>::lowest() == metricReturn);
+   CHECK(0.5 == metricReturn);
 }
 
 TEST_CASE("Term with one feature with two states and weights, interaction, regression") {
@@ -132,7 +132,7 @@ TEST_CASE("Term with one feature with two states and weights, interaction, regre
          });
 
    double metricReturn = test.TestCalcInteractionStrength({0});
-   CHECK(std::numeric_limits<double>::lowest() == metricReturn);
+   CHECK(0.44444444444441916 == metricReturn);
 }
 
 TEST_CASE("weights are proportional, interaction, regression") {