Skip to content

Commit

Permalink
Nww picksplit
Browse files Browse the repository at this point in the history
  • Loading branch information
zachasme committed Oct 14, 2024
1 parent 905aa81 commit f5c63c6
Show file tree
Hide file tree
Showing 8 changed files with 471 additions and 110 deletions.
1 change: 1 addition & 0 deletions h3/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ PostgreSQL_add_extension(postgresql_h3
src/binding/regions.c
src/binding/traversal.c
src/binding/vertex.c
src/algos.c
src/deprecated.c
src/extension.c
src/guc.c
Expand Down
10 changes: 5 additions & 5 deletions h3/sql/install/14-opclass_gist.sql
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ CREATE OR REPLACE FUNCTION h3index_gist_same(h3index, h3index, internal) RETURNS
CREATE OR REPLACE FUNCTION h3index_gist_distance(internal, h3index, smallint, oid, internal) RETURNS float8
AS 'h3' LANGUAGE C STRICT;

CREATE OPERATOR CLASS h3index_ops DEFAULT FOR TYPE h3index USING gist AS
OPERATOR 3 && ,
OPERATOR 6 = ,
OPERATOR 7 @> ,
OPERATOR 8 <@ ,
CREATE OPERATOR CLASS experimental_h3index_ops FOR TYPE h3index USING gist AS
OPERATOR 3 && , -- RTOverlapStrategyNumber
OPERATOR 6 = , -- RTSameStrategyNumber
OPERATOR 7 @> , -- RTContainsStrategyNumber
OPERATOR 8 <@ , -- RTContainedByStrategyNumber
OPERATOR 15 <-> (h3index, h3index) FOR ORDER BY integer_ops,

FUNCTION 1 h3index_gist_consistent(internal, h3index, smallint, oid, internal),
Expand Down
2 changes: 1 addition & 1 deletion h3/sql/updates/h3--4.1.3--unreleased.sql
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ CREATE OR REPLACE FUNCTION h3index_gist_same(h3index, h3index, internal) RETURNS
CREATE OR REPLACE FUNCTION h3index_gist_distance(internal, h3index, smallint, oid, internal) RETURNS float8
AS 'h3' LANGUAGE C STRICT;

CREATE OPERATOR CLASS h3index_ops DEFAULT FOR TYPE h3index USING gist AS
CREATE OPERATOR CLASS experimental_h3index_ops FOR TYPE h3index USING gist AS
OPERATOR 3 && ,
OPERATOR 6 = ,
OPERATOR 7 @> ,
Expand Down
51 changes: 51 additions & 0 deletions h3/src/algos.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2023 Bytes & Brains
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <h3api.h>
#include "algos.h"
#include "error.h"

H3Index
finest_common_ancestor(H3Index a, H3Index b)
{
int aRes,
bRes,
coarsestRes;
H3Index aParent,
bParent;

if (a == b)
return a;

/* do not even share the basecell */
if (getBaseCellNumber(a) != getBaseCellNumber(b))
return H3_NULL;

aRes = getResolution(a);
bRes = getResolution(b);
coarsestRes = (aRes < bRes) ? aRes : bRes;

/* iterate backwards through resolutions */
for (int i = coarsestRes; i > 0; i--)
{
h3_assert(cellToParent(a, i, &aParent));
h3_assert(cellToParent(b, i, &bParent));
if (aParent == bParent)
return aParent;
}

return H3_NULL;
}
24 changes: 24 additions & 0 deletions h3/src/algos.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright 2023 Bytes & Brains
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef H3_ALGOS_H
#define H3_ALGOS_H

#include <h3api.h>

H3Index finest_common_ancestor(H3Index, H3Index);

#endif /* H3_ALGOS_H */
179 changes: 101 additions & 78 deletions h3/src/opclass_gist.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2019-2020 Bytes & Brains
* Copyright 2019-2023 Bytes & Brains
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,11 +21,9 @@
#include <access/gist.h> // GiST

#include <h3api.h> // Main H3 include
#include "type.h"
#include "algos.h"
#include "error.h"

#define H3_ROOT_INDEX -1

#include "type.h"

PGDLLEXPORT PG_FUNCTION_INFO_V1(h3index_gist_union);
PGDLLEXPORT PG_FUNCTION_INFO_V1(h3index_gist_consistent);
Expand Down Expand Up @@ -58,54 +56,16 @@ gist_cmp(H3Index a, H3Index b)

/* a contains b */
error = cellToParent(b, aRes, &bParent);
if (!error && a == H3_ROOT_INDEX || (aRes < bRes && bParent == a))
if (!error && a == H3_NULL || (aRes < bRes && bParent == a))
return 1;

/* a contained by b */
error = cellToParent(a, bRes, &aParent);
if (!error && b == H3_ROOT_INDEX || (aRes > bRes && aParent == b))
if (!error && b == H3_NULL || (aRes > bRes && aParent == b))
return -1;

/* no overlap */
return 0;
}

/**
* GiST support
*/

static H3Index
common_ancestor(H3Index a, H3Index b)
{
int aRes,
bRes,
bigRes;
H3Index aParent,
bParent;

if (a == b)
return a;

/* do not even share the basecell */
if (getBaseCellNumber(a) != getBaseCellNumber(b))
return H3_ROOT_INDEX;

aRes = getResolution(a);
bRes = getResolution(b);
bigRes = (aRes > bRes) ? aRes : bRes;

/* iterate back basecells */
for (int i = bigRes; i > 0; i--)
{
if (cellToParent(a, i, &aParent))
continue;
if (cellToParent(b, i, &bParent))
continue;
if (aParent == bParent)
return aParent;
}

return H3_ROOT_INDEX;
return 0;
}

/**
Expand All @@ -125,7 +85,7 @@ h3index_gist_union(PG_FUNCTION_ARGS)
for (int i = 1; i < n; i++)
{
tmp = DatumGetH3Index(entries[i].key);
out = common_ancestor(out, tmp);
out = finest_common_ancestor(out, tmp);
}

PG_RETURN_H3INDEX(out);
Expand Down Expand Up @@ -154,10 +114,13 @@ h3index_gist_consistent(PG_FUNCTION_ARGS)
switch (strategy)
{
case RTOverlapStrategyNumber:
/* x && y */
PG_RETURN_BOOL(gist_cmp(key, query) != 0);
case RTContainsStrategyNumber:
/* x @> y */
PG_RETURN_BOOL(gist_cmp(key, query) > 0);
case RTContainedByStrategyNumber:
/* x <@ y */
if (GIST_LEAF(entry))
{
PG_RETURN_BOOL(gist_cmp(key, query) < 0);
Expand Down Expand Up @@ -201,7 +164,7 @@ h3index_gist_penalty(PG_FUNCTION_ARGS)
H3Index orig = DatumGetH3Index(origentry->key);
H3Index new = DatumGetH3Index(newentry->key);

H3Index ancestor = common_ancestor(orig, new);
H3Index ancestor = finest_common_ancestor(orig, new);

*penalty = (float) getResolution(orig) - getResolution(ancestor);

Expand All @@ -219,21 +182,42 @@ h3index_gist_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);

OffsetNumber maxoff = entryvec->n - 1;
GISTENTRY *ent = entryvec->vector;
int i,
j,
real_i,
real_j,
nbytes;
OffsetNumber *left,
*right;
GISTENTRY **raw_entryvec;

H3Index tmp_union,
unionL,
unionR;
GISTENTRY **raw_entryvec;
unionR,
a,
b,
seed_union,
seed_left,
seed_right,
check_left,
check_right;

bool lset = false,
rset = false;

int res_a,
res_b,
res_finest;
int64_t waste,
max_waste,
nchildren,
size_l,
size_r,
check_size_l,
check_size_r;

nbytes = (maxoff + 1) * sizeof(OffsetNumber);

v->spl_left = (OffsetNumber *) palloc(nbytes);
Expand All @@ -249,55 +233,94 @@ h3index_gist_picksplit(PG_FUNCTION_ARGS)
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
raw_entryvec[i] = &(ent[i]);

/* FIRST lets find best initial split (most wasted space if grouped) */

max_waste = 0;
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
int real_index = raw_entryvec[i] - ent;
real_i = raw_entryvec[i] - entryvec->vector;
a = DatumGetH3Index(entryvec->vector[real_i].key);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
real_j = raw_entryvec[j] - entryvec->vector;
b = DatumGetH3Index(entryvec->vector[real_j].key);

tmp_union = DatumGetH3Index(ent[real_index].key);
/* DEBUG_H3INDEX(tmp_union); */
/* Assert(tmp_union != NULL); */
/* no waste if containment */
if (gist_cmp(a, b) != 0)
{
waste = 0;
/* otherwise lets calc waste */
}
else
{
seed_union = finest_common_ancestor(a, b);

res_a = getResolution(a);
res_b = getResolution(b);
res_finest = (res_a > res_b) ? res_a : res_b;

h3_assert(cellToChildrenSize(seed_union, res_finest, &waste));
h3_assert(cellToChildrenSize(a, res_finest, &nchildren));
waste -= nchildren;
h3_assert(cellToChildrenSize(b, res_finest, &nchildren));
waste -= nchildren;
}

if (waste > max_waste)
{
max_waste = waste;
seed_left = a;
seed_right = b;
}
}
}
DEBUG("BEST SPLIT %i", max_waste);
DEBUG_H3INDEX(seed_left);
DEBUG_H3INDEX(seed_right);
unionL = seed_left;
size_l = getResolution(unionL);
unionR = seed_right;
size_r = getResolution(unionR);

/* SECOND assign each node to best seed */

for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
real_i = raw_entryvec[i] - ent;
a = DatumGetH3Index(ent[real_i].key);

check_left = finest_common_ancestor(unionL, a);
check_size_l = getResolution(check_left);

check_right = finest_common_ancestor(unionR, a);
check_size_r = getResolution(check_right);

/*
* Choose where to put the index entries and update unionL and unionR
* accordingly. Append the entries to either v_spl_left or
* v_spl_right, and care about the counters.
*/

if (v->spl_nleft < v->spl_nright)
if (check_size_l - size_l < check_size_r - size_r)
{
if (lset == false)
{
lset = true;
unionL = tmp_union;
}
else
{
unionL = common_ancestor(unionL, tmp_union);
}
*left = real_index;
unionL = check_left;
size_l = check_size_l;
*left = real_i;
++left;
++(v->spl_nleft);
}
else
{
if (rset == false)
{
rset = true;
/* DEBUG_H3INDEX(tmp_union); */
unionR = tmp_union;
}
else
{
unionR = common_ancestor(unionR, tmp_union);
}
*right = real_index;
unionR = check_right;
size_r = check_size_r;
*right = real_i;
++right;
++(v->spl_nright);
}
}

v->spl_ldatum = H3IndexGetDatum(unionL);
v->spl_rdatum = H3IndexGetDatum(unionR);

PG_RETURN_POINTER(v);
}

Expand Down
Loading

0 comments on commit f5c63c6

Please sign in to comment.