Skip to content

Commit

Permalink
fix to issue 374 (#376)
Browse files Browse the repository at this point in the history
  • Loading branch information
sudiptoguha authored Mar 14, 2023
1 parent 63743aa commit cffd221
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Java/benchmark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>software.amazon.randomcutforest</groupId>
<artifactId>randomcutforest-parent</artifactId>
<version>3.5.1-SNAPSHOT</version>
<version>3.5.1</version>
</parent>

<artifactId>randomcutforest-benchmark</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion Java/core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>software.amazon.randomcutforest</groupId>
<artifactId>randomcutforest-parent</artifactId>
<version>3.5.1-SNAPSHOT</version>
<version>3.5.1</version>
</parent>

<artifactId>randomcutforest-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,18 @@ public void setBoundingBoxCacheFraction(double fraction) {

/**
* Return a new {@link Cut}, which is chosen uniformly over the space of
* possible cuts for the given bounding box.
* possible cuts for a bounding box and its union with a point. The cut must
* exist unless the union box is a single point. There are floating point issues
* -- even though the original values are in float anf the calculations are in
* double, which can show up with large number of dimensions (each trigerring an
* addition/substraction).
*
* @param factor A random cut
* @param point the point whose union is taken with the box
* @param box A bounding box that we want to find a random cut for.
* @return A new Cut corresponding to a random cut in the bounding box.
*/
protected Cut randomCut(double factor, float[] point, BoundingBox box) {
protected static Cut randomCut(double factor, float[] point, BoundingBox box) {
double range = 0.0;

for (int i = 0; i < point.length; i++) {
Expand All @@ -143,6 +148,9 @@ protected Cut randomCut(double factor, float[] point, BoundingBox box) {
range += maxValue - minValue;
}

checkArgument(range > 0, " the union is a single point " + Arrays.toString(point)
+ "or the box is inappropriate, box" + box.toString() + "factor =" + factor);

double breakPoint = factor * range;

for (int i = 0; i < box.getDimensions(); i++) {
Expand All @@ -169,7 +177,50 @@ protected Cut randomCut(double factor, float[] point, BoundingBox box) {
breakPoint -= gap;
}

throw new IllegalStateException("The break point did not lie inside the expected range");
// if we are here then factor is likely almost 1 and we have floating point
// issues
// we will randomize between the first and the last non-zero ranges and choose
// the
// same cutValue as using nextAfter above -- we will use the factor as a seed
// and
// not be optimizing this sequel (either in execution or code) to ensure easier
// debugging
// this should be an anomaly - no pun intended.

Random rng = new Random((long) factor);
if (rng.nextDouble() < 0.5) {
for (int i = 0; i < box.getDimensions(); i++) {
float minValue = (float) box.getMinValue(i);
float maxValue = (float) box.getMaxValue(i);
if (point[i] < minValue) {
minValue = point[i];
} else if (point[i] > maxValue) {
maxValue = point[i];
}
if (maxValue > minValue) {
double cutValue = Math.nextAfter((float) maxValue, minValue);
return new Cut(i, cutValue);
}
}
} else {
for (int i = box.getDimensions() - 1; i >= 0; i--) {
float minValue = (float) box.getMinValue(i);
float maxValue = (float) box.getMaxValue(i);
if (point[i] < minValue) {
minValue = point[i];
} else if (point[i] > maxValue) {
maxValue = point[i];
}
if (maxValue > minValue) {
double cutValue = Math.nextAfter((float) maxValue, minValue);
return new Cut(i, cutValue);
}
}
}

throw new IllegalStateException("The break point did not lie inside the expected range; factor " + factor
+ ", point " + Arrays.toString(point) + " box " + box.toString());

}

public Integer addPoint(Integer pointIndex, long sequenceIndex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -890,11 +890,13 @@ public void testFloatingPointRandomCut() {
int dimensions = 16;
int numberOfTrees = 41;
int sampleSize = 64;
long seed = new Random().nextLong();
System.out.println(" seed " + seed);
int dataSize = 4000 * sampleSize;
double[][] big = generateShingledData(dataSize, dimensions, 2);
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions)
.numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(Precision.FLOAT_32)
.randomSeed(2051627799894425983L).boundingBoxCacheFraction(1.0).build();
.numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(Precision.FLOAT_32).randomSeed(seed)
.boundingBoxCacheFraction(1.0).build();

int num = 0;
for (double[] point : big) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,4 +366,32 @@ public void testUpdatesOnSmallBoundingBox() {
tree.addPoint(i % points.size(), point.getSequenceIndex());
}
}

@Test
public void testfloat() {
float x = 110.13f;
double sum = 0;
int trials = 230000;
for (int i = 0; i < trials; i++) {
float z = (x * (trials - i + 1) - x);
sum += z;
}
System.out.println(sum);
for (int i = 0; i < trials - 1; i++) {
float z = (x * (trials - i + 1) - x);
sum -= z;
}
System.out.println(sum + " " + (double) x + " " + (sum <= (double) x));
float[] possible = new float[trials];
float[] alsoPossible = new float[trials];
for (int i = 0; i < trials; i++) {
possible[i] = x;
alsoPossible[i] = (trials - i + 1) * x;
}
BoundingBox box = new BoundingBox(possible, alsoPossible);
System.out.println("rangesum " + box.getRangeSum());
double factor = 1.0 - 1e-16;
System.out.println(factor);
Cut cut = RandomCutTree.randomCut(factor, possible, box);
}
}
2 changes: 1 addition & 1 deletion Java/examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<parent>
<groupId>software.amazon.randomcutforest</groupId>
<artifactId>randomcutforest-parent</artifactId>
<version>3.5.1-SNAPSHOT</version>
<version>3.5.1</version>
</parent>

<artifactId>randomcutforest-examples</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion Java/parkservices/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>software.amazon.randomcutforest</groupId>
<artifactId>randomcutforest-parent</artifactId>
<version>3.5.1-SNAPSHOT</version>
<version>3.5.1</version>
</parent>

<artifactId>randomcutforest-parkservices</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion Java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>software.amazon.randomcutforest</groupId>
<artifactId>randomcutforest-parent</artifactId>
<version>3.5.1-SNAPSHOT</version>
<version>3.5.1</version>
<packaging>pom</packaging>

<name>software.amazon.randomcutforest:randomcutforest</name>
Expand Down
2 changes: 1 addition & 1 deletion Java/serialization/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<parent>
<groupId>software.amazon.randomcutforest</groupId>
<artifactId>randomcutforest-parent</artifactId>
<version>3.5.1-SNAPSHOT</version>
<version>3.5.1</version>
</parent>

<artifactId>randomcutforest-serialization</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion Java/testutils/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<parent>
<artifactId>randomcutforest-parent</artifactId>
<groupId>software.amazon.randomcutforest</groupId>
<version>3.5.1-SNAPSHOT</version>
<version>3.5.1</version>
</parent>

<artifactId>randomcutforest-testutils</artifactId>
Expand Down

0 comments on commit cffd221

Please sign in to comment.