Skip to content

Commit

Permalink
fix bugs where occasionally the hessian can be negative due to floati…
Browse files Browse the repository at this point in the history
…ng point noise
  • Loading branch information
paulbkoch committed Nov 18, 2024
1 parent 77b339e commit 17ea95d
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 30 deletions.
2 changes: 1 addition & 1 deletion shared/libebm/GenerateTermUpdate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ static ErrorEbm BoostMultiDimensional(BoosterShell* const pBoosterShell,
++pHessian;
}
double update = *pScore;
gain += CalcPartialGainFromUpdate(grad, hess, -update, regAlpha, regLambda);
gain += CalcPartialGainFromUpdate<true>(grad, hess, -update, regAlpha, regLambda);
++pGradient;
++pScore;
}
Expand Down
15 changes: 8 additions & 7 deletions shared/libebm/PartitionOneDimensionalBoosting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,9 @@ static int FindBestSplitGain(RandomDeterministic* const pRng,

if(MONOTONE_NONE != direction) {
const FloatCalc negUpdateRight =
CalcNegUpdate<false>(sumGradientsRight, sumHessiansRightUpdate, regAlpha, regLambda, deltaStepMax);
CalcNegUpdate<true>(sumGradientsRight, sumHessiansRightUpdate, regAlpha, regLambda, deltaStepMax);
const FloatCalc negUpdateLeft =
CalcNegUpdate<false>(sumGradientsLeft, sumHessiansLeftUpdate, regAlpha, regLambda, deltaStepMax);
CalcNegUpdate<true>(sumGradientsLeft, sumHessiansLeftUpdate, regAlpha, regLambda, deltaStepMax);
if(MonotoneDirection{0} < direction) {
if(negUpdateLeft < negUpdateRight) {
bLegal = false;
Expand All @@ -413,13 +413,14 @@ static int FindBestSplitGain(RandomDeterministic* const pRng,
}

const FloatCalc gainRight =
CalcPartialGain(sumGradientsRight, sumHessiansRight, regAlpha, regLambda, deltaStepMax);
EBM_ASSERT(std::isnan(gainRight) || 0 <= gainRight);
CalcPartialGain<false>(sumGradientsRight, sumHessiansRight, regAlpha, regLambda, deltaStepMax);
EBM_ASSERT(!bLegal || std::isnan(gainRight) || 0 <= gainRight);
gain += gainRight;

// if bLegal was set to false, sumHessiansLeft can be negative
const FloatCalc gainLeft =
CalcPartialGain(sumGradientsLeft, sumHessiansLeft, regAlpha, regLambda, deltaStepMax);
EBM_ASSERT(std::isnan(gainLeft) || 0 <= gainLeft);
CalcPartialGain<true>(sumGradientsLeft, sumHessiansLeft, regAlpha, regLambda, deltaStepMax);
EBM_ASSERT(!bLegal || std::isnan(gainLeft) || 0 <= gainLeft);
gain += gainLeft;

++iScore;
Expand Down Expand Up @@ -524,7 +525,7 @@ done:;
// we would not get here unless there was a split, so both sides must meet the minHessian reqirement
EBM_ASSERT(hessianMin <= sumHessiansOverwrite);
const FloatCalc gain1 =
CalcPartialGain(sumGradientsParent, sumHessiansOverwrite, regAlpha, regLambda, deltaStepMax);
CalcPartialGain<false>(sumGradientsParent, sumHessiansOverwrite, regAlpha, regLambda, deltaStepMax);
EBM_ASSERT(std::isnan(gain1) || 0 <= gain1);
bestGain -= gain1;

Expand Down
8 changes: 4 additions & 4 deletions shared/libebm/PartitionTwoDimensionalBoosting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ template<bool bHessian, size_t cCompilerScores> class PartitionTwoDimensionalBoo
}

const FloatCalc gain1 =
CalcPartialGain(static_cast<FloatCalc>(aGradientPairsLocal[iScore].m_sumGradients),
CalcPartialGain<false>(static_cast<FloatCalc>(aGradientPairsLocal[iScore].m_sumGradients),
hessian,
regAlpha,
regLambda,
Expand Down Expand Up @@ -543,7 +543,7 @@ template<bool bHessian, size_t cCompilerScores> class PartitionTwoDimensionalBoo
}

const FloatCalc gain1 =
CalcPartialGain(static_cast<FloatCalc>(aGradientPairsLocal[iScore].m_sumGradients),
CalcPartialGain<false>(static_cast<FloatCalc>(aGradientPairsLocal[iScore].m_sumGradients),
hessian,
regAlpha,
regLambda,
Expand Down Expand Up @@ -670,7 +670,7 @@ template<bool bHessian, size_t cCompilerScores> class PartitionTwoDimensionalBoo
++pHessian;
}
double update = *pScore;
gain += CalcPartialGainFromUpdate(grad, hess, -update, regAlpha, regLambda);
gain += CalcPartialGainFromUpdate<true>(grad, hess, -update, regAlpha, regLambda);
++pGradient;
++pScore;
}
Expand Down Expand Up @@ -855,7 +855,7 @@ template<bool bHessian, size_t cCompilerScores> class PartitionTwoDimensionalBoo
EBM_ASSERT(hessianMin <= hess);

const FloatCalc gain1 =
CalcPartialGain(static_cast<FloatCalc>(pGradientPairTotal[iScore].m_sumGradients),
CalcPartialGain<false>(static_cast<FloatCalc>(pGradientPairTotal[iScore].m_sumGradients),
hess,
regAlpha,
regLambda,
Expand Down
25 changes: 15 additions & 10 deletions shared/libebm/PartitionTwoDimensionalInteraction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,14 @@ template<bool bHessian, size_t cCompilerScores> class PartitionTwoDimensionalInt
const FloatCalc negPure11 = common / (FloatCalc{1} + w11 / w00 + w11 / w01 + w11 / w10);

// g = partial gain
const FloatCalc g00 = CalcPartialGainFromUpdate(grad00, hess00, negPure00, regAlpha, regLambda);
const FloatCalc g01 = CalcPartialGainFromUpdate(grad01, hess01, negPure01, regAlpha, regLambda);
const FloatCalc g10 = CalcPartialGainFromUpdate(grad10, hess10, negPure10, regAlpha, regLambda);
const FloatCalc g11 = CalcPartialGainFromUpdate(grad11, hess11, negPure11, regAlpha, regLambda);
const FloatCalc g00 =
CalcPartialGainFromUpdate<false>(grad00, hess00, negPure00, regAlpha, regLambda);
const FloatCalc g01 =
CalcPartialGainFromUpdate<false>(grad01, hess01, negPure01, regAlpha, regLambda);
const FloatCalc g10 =
CalcPartialGainFromUpdate<false>(grad10, hess10, negPure10, regAlpha, regLambda);
const FloatCalc g11 =
CalcPartialGainFromUpdate<false>(grad11, hess11, negPure11, regAlpha, regLambda);

gain += g00;
gain += g01;
Expand All @@ -363,13 +367,14 @@ template<bool bHessian, size_t cCompilerScores> class PartitionTwoDimensionalInt
}
} else {
// non-purified gain
gain += CalcPartialGain(grad00, hess00, regAlpha, regLambda, deltaStepMax);
gain += CalcPartialGain(grad01, hess01, regAlpha, regLambda, deltaStepMax);
gain += CalcPartialGain(grad10, hess10, regAlpha, regLambda, deltaStepMax);
gain += CalcPartialGain(grad11, hess11, regAlpha, regLambda, deltaStepMax);
gain += CalcPartialGain<false>(grad00, hess00, regAlpha, regLambda, deltaStepMax);
gain += CalcPartialGain<false>(grad01, hess01, regAlpha, regLambda, deltaStepMax);
gain += CalcPartialGain<false>(grad10, hess10, regAlpha, regLambda, deltaStepMax);
gain += CalcPartialGain<false>(grad11, hess11, regAlpha, regLambda, deltaStepMax);
}
}
EBM_ASSERT(std::isnan(gain) || 0 <= gain); // sumations of positive numbers should be positive
// gain should be positive if we're dealing with unpurified updates
EBM_ASSERT(0 != (CalcInteractionFlags_Purify & flags) || std::isnan(gain) || 0 <= gain);

// If we get a NaN result, we'd like to propagate it by making bestGain NaN.
// The rules for NaN values say that non equality comparisons are all false so,
Expand Down Expand Up @@ -413,7 +418,7 @@ template<bool bHessian, size_t cCompilerScores> class PartitionTwoDimensionalInt

EBM_ASSERT(hessianMin <= hess);

bestGain -= CalcPartialGain(static_cast<FloatCalc>(aGradientPairs[iScore].m_sumGradients),
bestGain -= CalcPartialGain<false>(static_cast<FloatCalc>(aGradientPairs[iScore].m_sumGradients),
hess,
regAlpha,
regLambda,
Expand Down
28 changes: 20 additions & 8 deletions shared/libebm/ebm_stats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,36 +90,43 @@ INLINE_ALWAYS static FloatCalc CalcNegUpdate(const FloatCalc sumGradient,
const FloatCalc regAlpha,
const FloatCalc regLambda,
const FloatCalc deltaStepMax) {
// a loss function with negative hessians would be unstable
EBM_ASSERT(std::isnan(sumHessian) || FloatCalc{0} <= sumHessian);
EBM_ASSERT(FloatCalc{0} < deltaStepMax);

if(bCheckHessian) {
if(UNLIKELY(sumHessian < std::numeric_limits<FloatCalc>::min())) {
return FloatCalc{0};
}
}
EBM_ASSERT(std::isnan(sumHessian) || FloatCalc{0} < sumHessian);

FloatCalc ret = ApplyL1(sumGradient, regAlpha) / ApplyL2(sumHessian, regLambda);
if(UNLIKELY(std::fabs(ret) > deltaStepMax)) {
ret = UNPREDICTABLE(ret < FloatCalc{0}) ? -deltaStepMax : deltaStepMax;
}
return ret;
}

template<bool bCheckHessian>
INLINE_ALWAYS static FloatCalc CalcPartialGainFromUpdate(const FloatCalc sumGradient,
const FloatCalc sumHessian,
const FloatCalc negUpdate,
const FloatCalc regAlpha,
const FloatCalc regLambda) {
// a loss function with negative hessians would be unstable
EBM_ASSERT(std::isnan(sumHessian) || FloatCalc{0} <= sumHessian);

if(bCheckHessian) {
if(UNLIKELY(sumHessian < std::numeric_limits<FloatCalc>::min())) {
return FloatCalc{0};
}
}
EBM_ASSERT(std::isnan(sumHessian) || FloatCalc{0} < sumHessian);

const FloatCalc partialGain =
negUpdate * (ApplyL1(sumGradient, regAlpha) * FloatCalc{2} - negUpdate * ApplyL2(sumHessian, regLambda));

return partialGain;
}

template<bool bCheckHessian>
INLINE_ALWAYS static FloatCalc CalcPartialGain(const FloatCalc sumGradient,
const FloatCalc sumHessian,
const FloatCalc regAlpha,
Expand All @@ -128,21 +135,26 @@ INLINE_ALWAYS static FloatCalc CalcPartialGain(const FloatCalc sumGradient,
// This gain function used to determine splits is equivalent to minimizing sum of squared error SSE, which
// can be seen following the derivation of Equation #7 in Ping Li's paper -> https://arxiv.org/pdf/1203.3491.pdf

// a loss function with negative hessians would be unstable
EBM_ASSERT(std::isnan(sumHessian) || FloatCalc{0} <= sumHessian);
EBM_ASSERT(FloatCalc{0} < deltaStepMax);

if(bCheckHessian) {
if(UNLIKELY(sumHessian < std::numeric_limits<FloatCalc>::min())) {
return FloatCalc{0};
}
}
EBM_ASSERT(std::isnan(sumHessian) || FloatCalc{0} < sumHessian);

FloatCalc partialGain;
if(UNLIKELY(std::numeric_limits<FloatCalc>::infinity() != deltaStepMax)) {
const FloatCalc negUpdate = CalcNegUpdate<false>(sumGradient, sumHessian, regAlpha, regLambda, deltaStepMax);
partialGain = CalcPartialGainFromUpdate(sumGradient, sumHessian, negUpdate, regAlpha, regLambda);
partialGain = CalcPartialGainFromUpdate<false>(sumGradient, sumHessian, negUpdate, regAlpha, regLambda);
} else {
const FloatCalc regularizedSumGradient = ApplyL1(sumGradient, regAlpha);
partialGain = regularizedSumGradient / ApplyL2(sumHessian, regLambda) * regularizedSumGradient;

EBM_ASSERT(std::isnan(partialGain) ||
IsApproxEqual(partialGain,
CalcPartialGainFromUpdate(sumGradient,
CalcPartialGainFromUpdate<false>(sumGradient,
sumHessian,
CalcNegUpdate<false>(sumGradient, sumHessian, regAlpha, regLambda, deltaStepMax),
regAlpha,
Expand Down

0 comments on commit 17ea95d

Please sign in to comment.