Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: statistical functions should return null when provided a vector of only null values #5606

Merged
merged 19 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 194 additions & 18 deletions engine/function/src/templates/Numeric.ftl

Large diffs are not rendered by default.

157 changes: 144 additions & 13 deletions engine/function/src/templates/TestNumeric.ftl

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusLong;
import static io.deephaven.engine.util.NullSafeAddition.minusLong;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative average operator.
Expand Down Expand Up @@ -92,7 +93,7 @@ private boolean addChunk(ByteChunk<? extends Values> values, long destination, i
runningSum.set(destination, newSum);
resultColumn.set(destination, (double) newSum / newCount);
} else if (nonNullCount.onlyNullsUnsafe(destination)) {
resultColumn.set(destination, Double.NaN);
resultColumn.set(destination, NULL_DOUBLE);
} else {
return false;
}
Expand All @@ -110,8 +111,11 @@ private boolean removeChunk(ByteChunk<? extends Values> values, long destination
final long newCount = nonNullCount.addNonNullUnsafe(destination, -chunkNonNull.get());
final long newSum = minusLong(runningSum.getUnsafe(destination), chunkSum);
runningSum.set(destination, newSum);
resultColumn.set(destination, (double) newSum / newCount);

if (newCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else {
resultColumn.set(destination, (double) newSum / newCount);
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// @formatter:off
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand All @@ -23,6 +24,7 @@

import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusDouble;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative variance operator.
Expand Down Expand Up @@ -86,21 +88,27 @@ private boolean addChunk(ByteChunk<? extends Values> values, long destination, i
final double sum = SumByteChunk.sum2ByteChunk(values, chunkStart, chunkSize, chunkNonNull, sum2);

if (chunkNonNull.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final double newSum = plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
} else if (nonNullCounter.getCountUnsafe(destination) <= 1) {
resultColumn.set(destination, Double.NaN);
} else {
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
}
}
return true;
}
Expand All @@ -114,12 +122,12 @@ private boolean removeChunk(ByteChunk<? extends Values> values, long destination
return false;
}

final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());

final double newSum;
final double newSum2;

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
newSum = newSum2 = 0;
} else {
newSum = plusDouble(sumSource.getUnsafe(destination), -sum);
Expand All @@ -129,12 +137,16 @@ private boolean removeChunk(ByteChunk<? extends Values> values, long destination
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
}
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
}

final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusLong;
import static io.deephaven.engine.util.NullSafeAddition.minusLong;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative average operator.
Expand Down Expand Up @@ -88,7 +89,7 @@ private boolean addChunk(CharChunk<? extends Values> values, long destination, i
runningSum.set(destination, newSum);
resultColumn.set(destination, (double) newSum / newCount);
} else if (nonNullCount.onlyNullsUnsafe(destination)) {
resultColumn.set(destination, Double.NaN);
resultColumn.set(destination, NULL_DOUBLE);
} else {
return false;
}
Expand All @@ -106,8 +107,11 @@ private boolean removeChunk(CharChunk<? extends Values> values, long destination
final long newCount = nonNullCount.addNonNullUnsafe(destination, -chunkNonNull.get());
final long newSum = minusLong(runningSum.getUnsafe(destination), chunkSum);
runningSum.set(destination, newSum);
resultColumn.set(destination, (double) newSum / newCount);

if (newCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else {
resultColumn.set(destination, (double) newSum / newCount);
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand All @@ -19,6 +20,7 @@

import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusDouble;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative variance operator.
Expand Down Expand Up @@ -82,21 +84,27 @@ private boolean addChunk(CharChunk<? extends Values> values, long destination, i
final double sum = SumCharChunk.sum2CharChunk(values, chunkStart, chunkSize, chunkNonNull, sum2);

if (chunkNonNull.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final double newSum = plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
} else if (nonNullCounter.getCountUnsafe(destination) <= 1) {
resultColumn.set(destination, Double.NaN);
} else {
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
}
}
return true;
}
Expand All @@ -110,12 +118,12 @@ private boolean removeChunk(CharChunk<? extends Values> values, long destination
return false;
}

final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());

final double newSum;
final double newSum2;

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
newSum = newSum2 = 0;
} else {
newSum = plusDouble(sumSource.getUnsafe(destination), -sum);
Expand All @@ -125,12 +133,16 @@ private boolean removeChunk(CharChunk<? extends Values> values, long destination
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
}
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
}

final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusDouble;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

class DoubleChunkedAvgOperator extends FpChunkedNonNormalCounter implements IterativeChunkedAggregationOperator {
private final String name;
Expand Down Expand Up @@ -141,19 +142,23 @@ private void updateResultWithNewSum(long destination, long totalNormal, long tot
resultColumn.set(destination, Double.POSITIVE_INFINITY);
} else if (totalNegativeInfinityCount > 0) {
resultColumn.set(destination, Double.NEGATIVE_INFINITY);
} else if (totalNormal == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else {
resultColumn.set(destination, newSum / totalNormal);
}
}

private void updateResultSumUnchanged(long destination, long totalNormal, long totalNanCount,
long totalInfinityCount, long totalNegativeInfinityCount) {
if (totalNanCount > 0 || totalNormal == 0 || (totalInfinityCount > 0 && totalNegativeInfinityCount > 0)) {
if (totalNanCount > 0 || (totalInfinityCount > 0 && totalNegativeInfinityCount > 0)) {
resultColumn.set(destination, Double.NaN);
} else if (totalInfinityCount > 0) {
resultColumn.set(destination, Double.POSITIVE_INFINITY);
} else if (totalNegativeInfinityCount > 0) {
resultColumn.set(destination, Double.NEGATIVE_INFINITY);
} else if (totalNormal == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else {
resultColumn.set(destination, runningSum.getUnsafe(destination) / totalNormal);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import java.util.Collections;
import java.util.Map;

import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative average operator.
*/
Expand Down Expand Up @@ -127,12 +129,14 @@ private boolean updateResult(long destination) {

private boolean updateResult(long destination, long nncValue, long nanValue, long picValue, long nicValue,
double sumSumValue) {
if (nanValue > 0 || (picValue > 0 && nicValue > 0) || nncValue == 0) {
if (nanValue > 0 || (picValue > 0 && nicValue > 0)) {
return !Double.isNaN(resultColumn.getAndSetUnsafe(destination, Double.NaN));
} else if (picValue > 0) {
return resultColumn.getAndSetUnsafe(destination, Double.POSITIVE_INFINITY) != Double.POSITIVE_INFINITY;
} else if (nicValue > 0) {
return resultColumn.getAndSetUnsafe(destination, Double.NEGATIVE_INFINITY) != Double.NEGATIVE_INFINITY;
} else if (nncValue == 0) {
return resultColumn.getAndSetUnsafe(destination, NULL_DOUBLE) != NULL_DOUBLE;
} else {
final double newValue = (double) (sumSumValue / nncValue);
return resultColumn.getAndSetUnsafe(destination, newValue) != newValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// @formatter:off
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand All @@ -23,6 +24,7 @@
import java.util.Map;

import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative variance operator.
Expand Down Expand Up @@ -95,31 +97,40 @@ private boolean addChunk(DoubleChunk<? extends Values> values, long destination,
final boolean forceNanResult = totalNegativeInfinities > 0 || totalPositiveInfinities > 0 || totalNanCount > 0;

if (chunkNormalCount.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNormalCount.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNormalCount.get());
final double newSum = NullSafeAddition.plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = NullSafeAddition.plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (forceNanResult || nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (forceNanResult || totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
// If the sum or sumSquared has reached +/-Infinity, we are stuck with NaN forever.
if (Double.isInfinite(newSum) || Double.isInfinite(newSum2)) {
resultColumn.set(destination, Double.NaN);
return true;
}
final double variance = computeVariance(nonNullCount, newSum, newSum2);
final double variance = computeVariance(totalNormalCount, newSum, newSum2);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
return true;
} else if (forceNanResult || (nonNullCounter.getCountUnsafe(destination) <= 1)) {
}
if (forceNanResult) {
resultColumn.set(destination, Double.NaN);
return true;
}
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
} else {
return false;
}
return false;
}

private static double computeVariance(long nonNullCount, double newSum, double newSum2) {
Expand Down Expand Up @@ -165,16 +176,17 @@ private boolean removeChunk(DoubleChunk<? extends Values> values, long destinati
}
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);
} else if (totalNormalCount <= 1 || forceNanResult) {
resultColumn.set(destination, Double.NaN);
return true;
} else {
newSum = sumSource.getUnsafe(destination);
newSum2 = sum2Source.getUnsafe(destination);
}
if (totalNormalCount <= 1) {

if (totalNormalCount == 1 || forceNanResult) {
resultColumn.set(destination, Double.NaN);
return true;
} else if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
}

// If the sum has reach +/-Infinity, we are stuck with NaN forever.
Expand All @@ -186,6 +198,7 @@ private boolean removeChunk(DoubleChunk<? extends Values> values, long destinati
// Perform the calculation in a way that minimizes the impact of FP error.
final double variance = computeVariance(totalNormalCount, newSum, newSum2);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
}

Expand Down
Loading
Loading