Skip to content

Commit

Permalink
Improve vertex sorting code and use radix sorts
Browse files Browse the repository at this point in the history
For larger arrays of floats (>80 elements), the radix sort
is significantly faster.

We also merge the process of calculating each quad's centroid
into the metric calculation, so that we can avoid expensive
object de-references, and so that it isn't necessary to
allocate additional memory prior to sorting.
  • Loading branch information
jellysquid3 committed Jan 12, 2025
1 parent 3af8680 commit d74e421
Show file tree
Hide file tree
Showing 14 changed files with 352 additions and 323 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -530,43 +530,25 @@ static private BSPNode buildTopoMultiLeafNode(BSPWorkspace workspace, IntArrayLi
}

static private BSPNode buildSNRLeafNodeFromQuads(BSPWorkspace workspace, IntArrayList indexes, LongArrayList points) {
// in this case the points array is wrong, but its allocation can be reused
final var indexBuffer = indexes.elements();
final var indexCount = indexes.size();

int[] quadIndexes;
final var keys = new int[indexCount];
final var perm = new int[indexCount];

// adapted from SNR sorting code
if (RadixSort.useRadixSort(indexes.size())) {
final var keys = new int[indexes.size()];

for (int i = 0; i < indexes.size(); i++) {
var quadIndex = indexes.getInt(i);
keys[i] = MathUtil.floatToComparableInt(workspace.quads[quadIndex].getAccurateDotProduct());
}

quadIndexes = RadixSort.sort(keys);

for (int i = 0; i < indexes.size(); i++) {
quadIndexes[i] = indexes.getInt(quadIndexes[i]);
}
} else {
final var sortData = points.elements();

for (int i = 0; i < indexes.size(); i++) {
var quadIndex = indexes.getInt(i);
int dotProductComponent = MathUtil.floatToComparableInt(workspace.quads[quadIndex].getAccurateDotProduct());
sortData[i] = (long) dotProductComponent << 32 | quadIndex;
}

Arrays.sort(sortData, 0, indexes.size());
for (int i = 0; i < indexCount; i++) {
TQuad quad = workspace.quads[indexBuffer[i]];
keys[i] = MathUtil.floatToComparableInt(quad.getAccurateDotProduct());
perm[i] = i;
}

quadIndexes = new int[indexes.size()];
RadixSort.sortIndirect(perm, keys);

for (int i = 0; i < indexes.size(); i++) {
quadIndexes[i] = (int) sortData[i];
}
for (int i = 0; i < indexCount; i++) {
perm[i] = indexBuffer[perm[i]];
}

return new LeafMultiBSPNode(BSPSortState.compressIndexes(IntArrayList.wrap(quadIndexes), false));
return new LeafMultiBSPNode(BSPSortState.compressIndexes(IntArrayList.wrap(perm), false));
}

static private BSPNode buildSNRLeafNodeFromPoints(BSPWorkspace workspace, LongArrayList points) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import org.joml.Vector3fc;

import java.nio.IntBuffer;
import java.util.Arrays;
import java.util.function.IntConsumer;

/**
Expand Down Expand Up @@ -221,30 +220,22 @@ void writeSort(CombinedCameraPos cameraPos, boolean initial) {
*/
static void distanceSortDirect(IntBuffer indexBuffer, TQuad[] quads, Vector3fc cameraPos) {
if (quads.length <= 1) {
// Avoid allocations when there is nothing to sort.
TranslucentData.writeQuadVertexIndexes(indexBuffer, 0);
} else if (RadixSort.useRadixSort(quads.length)) {
} else {
final var keys = new int[quads.length];
final var perm = new int[quads.length];

for (int q = 0; q < quads.length; q++) {
keys[q] = ~Float.floatToRawIntBits(quads[q].getCenter().distanceSquared(cameraPos));
}

var indices = RadixSort.sort(keys);

for (int i = 0; i < quads.length; i++) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, indices[i]);
}
} else {
final var data = new long[quads.length];
for (int q = 0; q < quads.length; q++) {
float distance = quads[q].getCenter().distanceSquared(cameraPos);
data[q] = (long) ~Float.floatToRawIntBits(distance) << 32 | q;
for (int idx = 0; idx < quads.length; idx++) {
var centroid = quads[idx].getCenter();
keys[idx] = ~Float.floatToRawIntBits(centroid.distanceSquared(cameraPos));
perm[idx] = idx;
}

Arrays.sort(data);
RadixSort.sortIndirect(perm, keys);

for (int i = 0; i < quads.length; i++) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, (int) data[i]);
for (int idx = 0; idx < quads.length; idx++) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, perm[idx]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,31 +44,21 @@ private static StaticNormalRelativeData fromDoubleUnaligned(int[] vertexCounts,
var indexBuffer = sorter.getIntBuffer();

if (quads.length <= 1) {
// Avoid allocations when there is nothing to sort.
TranslucentData.writeQuadVertexIndexes(indexBuffer, 0);
} else if (RadixSort.useRadixSort(quads.length)) {
} else {
final var keys = new int[quads.length];
final var perm = new int[quads.length];

for (int q = 0; q < quads.length; q++) {
keys[q] = MathUtil.floatToComparableInt(quads[q].getAccurateDotProduct());
perm[q] = q;
}

var indices = RadixSort.sort(keys);

for (int i = 0; i < quads.length; i++) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, indices[i]);
}
} else {
final var sortData = new long[quads.length];

for (int q = 0; q < quads.length; q++) {
int dotProductComponent = MathUtil.floatToComparableInt(quads[q].getAccurateDotProduct());
sortData[q] = (long) dotProductComponent << 32 | q;
}

Arrays.sort(sortData);
RadixSort.sortIndirect(perm, keys);

for (int i = 0; i < quads.length; i++) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, (int) sortData[i]);
TranslucentData.writeQuadVertexIndexes(indexBuffer, perm[i]);
}
}

Expand All @@ -86,21 +76,14 @@ private static StaticNormalRelativeData fromMixed(int[] vertexCounts,
var indexBuffer = sorter.getIntBuffer();

var maxQuadCount = 0;
boolean anyNeedsSortData = false;

for (var vertexCount : vertexCounts) {
if (vertexCount != -1) {
var quadCount = TranslucentData.vertexCountToQuadCount(vertexCount);
maxQuadCount = Math.max(maxQuadCount, quadCount);
anyNeedsSortData |= !RadixSort.useRadixSort(quadCount) && quadCount > 1;
}
}

long[] sortData = null;
if (anyNeedsSortData) {
sortData = new long[maxQuadCount];
}

int quadIndex = 0;
for (var vertexCount : vertexCounts) {
if (vertexCount == -1 || vertexCount == 0) {
continue;
Expand All @@ -110,32 +93,19 @@ private static StaticNormalRelativeData fromMixed(int[] vertexCounts,

if (count == 1) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, 0);
quadIndex++;
} else if (RadixSort.useRadixSort(count)) {
} else {
final var keys = new int[count];
final var perm = new int[count];

for (int q = 0; q < count; q++) {
keys[q] = MathUtil.floatToComparableInt(quads[quadIndex++].getAccurateDotProduct());
for (int idx = 0; idx < count; idx++) {
keys[idx] = MathUtil.floatToComparableInt(quads[idx].getAccurateDotProduct());
perm[idx] = idx;
}

var indices = RadixSort.sort(keys);

for (int i = 0; i < count; i++) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, indices[i]);
}
} else {
for (int i = 0; i < count; i++) {
var quad = quads[quadIndex++];
int dotProductComponent = MathUtil.floatToComparableInt(quad.getAccurateDotProduct());
sortData[i] = (long) dotProductComponent << 32 | i;
}

if (count > 1) {
Arrays.sort(sortData, 0, count);
}
RadixSort.sortIndirect(perm, keys);

for (int i = 0; i < count; i++) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, (int) sortData[i]);
for (int idx = 0; idx < count; idx++) {
TranslucentData.writeQuadVertexIndexes(indexBuffer, perm[idx]);
}
}
}
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit d74e421

Please sign in to comment.