From 3c8a5358e42ab8d11e0253c70f7cc7d37781b2ef Mon Sep 17 00:00:00 2001 From: Isaac Brodsky Date: Tue, 25 Jun 2024 21:05:34 -0600 Subject: [PATCH] Varchar input/output for hierarchy except for compact (#112) * rename to h3 * cell_to_parent * cell_to_children * child pos, center child * README --- README.md | 10 +- src/h3_hierarchy.cpp | 187 +++++++++++++++++++++++- test/sql/h3/h3_functions_hierarchy.test | 50 +++++++ 3 files changed, 235 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index ef484f9..ec1d7d5 100644 --- a/README.md +++ b/README.md @@ -58,11 +58,11 @@ one to use. The unsigned and signed APIs are identical. Many functions also supp | `h3_is_res_class_iii` | [v](#fv) | True if the cell's resolution is class III | `h3_is_pentagon` | [v](#fv) | True if the cell is a pentagon | `h3_get_icosahedron_faces` | [v](#fv) | List of icosahedron face IDs the cell is on -| `h3_cell_to_parent` | [i](#fi) | Get coarser cell for a cell -| `h3_cell_to_children` | [i](#fi) | Get finer cells for a cell -| `h3_cell_to_center_child` | [i](#fi) | Get the center finer cell for a cell -| `h3_cell_to_child_pos` | [i](#fi) | Get a sub-indexing number for a cell inside a parent -| `h3_child_pos_to_cell` | [i](#fi) | Convert parent and sub-indexing number to a cell ID +| `h3_cell_to_parent` | [v](#fv) | Get coarser cell for a cell +| `h3_cell_to_children` | [v](#fv) | Get finer cells for a cell +| `h3_cell_to_center_child` | [v](#fv) | Get the center finer cell for a cell +| `h3_cell_to_child_pos` | [v](#fv) | Get a sub-indexing number for a cell inside a parent +| `h3_child_pos_to_cell` | [v](#fv) | Convert parent and sub-indexing number to a cell ID | `h3_compact_cells` | [i](#fi) | Convert a set of single-resolution cells to the minimal mixed-resolution set | `h3_uncompact_cells` | [i](#fi) | Convert a mixed-resolution set to a single-resolution set of cells | `h3_grid_disk` | [i](#fi) | Find cells within a grid distance diff --git a/src/h3_hierarchy.cpp b/src/h3_hierarchy.cpp index 28b30ab..963641d 100644 --- a/src/h3_hierarchy.cpp +++ b/src/h3_hierarchy.cpp @@ -21,6 +21,33 @@ static void CellToParentFunction(DataChunk &args, ExpressionState &state, }); } +static void CellToParentVarcharFunction(DataChunk &args, ExpressionState &state, + Vector &result) { + auto &inputs = args.data[0]; + auto &inputs2 = args.data[1]; + BinaryExecutor::ExecuteWithNulls( + inputs, inputs2, result, args.size(), + [&](string_t input, int res, ValidityMask &mask, idx_t idx) { + H3Index h; + H3Error err0 = stringToH3(input.GetString().c_str(), &h); + if (err0) { + mask.SetInvalid(idx); + return StringVector::EmptyString(result, 0); + } else { + H3Index parent; + H3Error err1 = cellToParent(h, res, &parent); + if (err1) { + mask.SetInvalid(idx); + return StringVector::EmptyString(result, 0); + } else { + auto str = StringUtil::Format("%llx", parent); + string_t strAsStr = string_t(strdup(str.c_str()), str.size()); + return StringVector::AddString(result, strAsStr); + } + } + }); +} + static void CellToChildrenFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto result_data = FlatVector::GetData(result); @@ -59,6 +86,82 @@ static void CellToChildrenFunction(DataChunk &args, ExpressionState &state, result.Verify(args.size()); } +static void CellToChildrenVarcharFunction(DataChunk &args, + ExpressionState &state, + Vector &result) { + UnifiedVectorFormat vdata; + args.data[0].ToUnifiedFormat(args.size(), vdata); + auto ldata = UnifiedVectorFormat::GetData(vdata); + + auto result_data = FlatVector::GetData(result); + for (idx_t i = 0; i < args.size(); i++) { + result_data[i].offset = ListVector::GetListSize(result); + + string_t parentStr = ldata[i]; + int32_t res = args.GetValue(1, i) + .DefaultCastAs(LogicalType::INTEGER) + .GetValue(); + H3Index parent; + H3Error err0 = stringToH3(parentStr.GetString().c_str(), &parent); + if (err0) { + result.SetValue(i, Value(LogicalType::SQLNULL)); + } else { + int64_t sz; + H3Error err1 = cellToChildrenSize(parent, res, &sz); + if (err1) { + result.SetValue(i, Value(LogicalType::SQLNULL)); + } else { + std::vector out(sz); + H3Error err2 = cellToChildren(parent, res, out.data()); + if (err2) { + result.SetValue(i, Value(LogicalType::SQLNULL)); + } else { + int64_t actual = 0; + for (auto val : out) { + if (val != H3_NULL) { + auto str = StringUtil::Format("%llx", val); + string_t strAsStr = string_t(strdup(str.c_str()), str.size()); + ListVector::PushBack(result, strAsStr); + actual++; + } + } + + result_data[i].length = actual; + } + } + } + } + result.Verify(args.size()); +} + +static void CellToCenterChildVarcharFunction(DataChunk &args, + ExpressionState &state, + Vector &result) { + auto &inputs = args.data[0]; + auto &inputs2 = args.data[1]; + BinaryExecutor::ExecuteWithNulls( + inputs, inputs2, result, args.size(), + [&](string_t input, int res, ValidityMask &mask, idx_t idx) { + H3Index h; + H3Error err0 = stringToH3(input.GetString().c_str(), &h); + if (err0) { + mask.SetInvalid(idx); + return StringVector::EmptyString(result, 0); + } else { + H3Index parent; + H3Error err1 = cellToCenterChild(h, res, &parent); + if (err1) { + mask.SetInvalid(idx); + return StringVector::EmptyString(result, 0); + } else { + auto str = StringUtil::Format("%llx", parent); + string_t strAsStr = string_t(strdup(str.c_str()), str.size()); + return StringVector::AddString(result, strAsStr); + } + } + }); +} + static void CellToCenterChildFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &inputs = args.data[0]; @@ -77,6 +180,32 @@ static void CellToCenterChildFunction(DataChunk &args, ExpressionState &state, }); } +static void CellToChildPosVarcharFunction(DataChunk &args, + ExpressionState &state, + Vector &result) { + auto &inputs = args.data[0]; + auto &inputs2 = args.data[1]; + BinaryExecutor::ExecuteWithNulls( + inputs, inputs2, result, args.size(), + [&](string_t input, int res, ValidityMask &mask, idx_t idx) { + H3Index h; + H3Error err0 = stringToH3(input.GetString().c_str(), &h); + if (err0) { + mask.SetInvalid(idx); + return int64_t(0); + } else { + int64_t child; + H3Error err1 = cellToChildPos(h, res, &child); + if (err1) { + mask.SetInvalid(idx); + return int64_t(0); + } else { + return child; + } + } + }); +} + static void CellToChildPosFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &inputs = args.data[0]; @@ -95,6 +224,35 @@ static void CellToChildPosFunction(DataChunk &args, ExpressionState &state, }); } +static void ChildPosToCellVarcharFunction(DataChunk &args, + ExpressionState &state, + Vector &result) { + auto &inputs = args.data[0]; + auto &inputs2 = args.data[1]; + auto &inputs3 = args.data[2]; + TernaryExecutor::ExecuteWithNulls( + inputs, inputs2, inputs3, result, args.size(), + [&](int64_t pos, string_t input, int res, ValidityMask &mask, idx_t idx) { + H3Index h; + H3Error err0 = stringToH3(input.GetString().c_str(), &h); + if (err0) { + mask.SetInvalid(idx); + return StringVector::EmptyString(result, 0); + } else { + H3Index child; + H3Error err = childPosToCell(pos, h, res, &child); + if (err) { + mask.SetInvalid(idx); + return StringVector::EmptyString(result, 0); + } else { + auto str = StringUtil::Format("%llx", child); + string_t strAsStr = string_t(strdup(str.c_str()), str.size()); + return StringVector::AddString(result, strAsStr); + } + } + }); +} + static void ChildPosToCellFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &inputs = args.data[0]; @@ -275,7 +433,9 @@ static void UncompactCellsFunction(DataChunk &args, ExpressionState &state, CreateScalarFunctionInfo H3Functions::GetCellToParentFunction() { ScalarFunctionSet funcs("h3_cell_to_parent"); - // TODO: VARCHAR variant of this function + funcs.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::INTEGER}, + LogicalType::VARCHAR, + CellToParentVarcharFunction)); funcs.AddFunction(ScalarFunction({LogicalType::UBIGINT, LogicalType::INTEGER}, LogicalType::UBIGINT, CellToParentFunction)); funcs.AddFunction(ScalarFunction({LogicalType::BIGINT, LogicalType::INTEGER}, @@ -285,7 +445,9 @@ CreateScalarFunctionInfo H3Functions::GetCellToParentFunction() { CreateScalarFunctionInfo H3Functions::GetCellToChildrenFunction() { ScalarFunctionSet funcs("h3_cell_to_children"); - // TODO: VARCHAR variant of this function + funcs.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::INTEGER}, + LogicalType::LIST(LogicalType::VARCHAR), + CellToChildrenVarcharFunction)); funcs.AddFunction(ScalarFunction({LogicalType::UBIGINT, LogicalType::INTEGER}, LogicalType::LIST(LogicalType::UBIGINT), CellToChildrenFunction)); @@ -297,7 +459,9 @@ CreateScalarFunctionInfo H3Functions::GetCellToChildrenFunction() { CreateScalarFunctionInfo H3Functions::GetCellToCenterChildFunction() { ScalarFunctionSet funcs("h3_cell_to_center_child"); - // TODO: VARCHAR variant of this function + funcs.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::INTEGER}, + LogicalType::VARCHAR, + CellToCenterChildVarcharFunction)); funcs.AddFunction(ScalarFunction({LogicalType::UBIGINT, LogicalType::INTEGER}, LogicalType::UBIGINT, CellToCenterChildFunction)); @@ -309,8 +473,10 @@ CreateScalarFunctionInfo H3Functions::GetCellToCenterChildFunction() { CreateScalarFunctionInfo H3Functions::GetCellToChildPosFunction() { ScalarFunctionSet funcs("h3_cell_to_child_pos"); - // TODO: VARCHAR variant of this function // Note this does not return an index, rather it returns a position ID + funcs.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::INTEGER}, + LogicalType::BIGINT, + CellToChildPosVarcharFunction)); funcs.AddFunction(ScalarFunction({LogicalType::UBIGINT, LogicalType::INTEGER}, LogicalType::BIGINT, CellToChildPosFunction)); @@ -322,7 +488,9 @@ CreateScalarFunctionInfo H3Functions::GetCellToChildPosFunction() { CreateScalarFunctionInfo H3Functions::GetChildPosToCellFunction() { ScalarFunctionSet funcs("h3_child_pos_to_cell"); - // TODO: VARCHAR variant of this function + funcs.AddFunction(ScalarFunction( + {LogicalType::BIGINT, LogicalType::VARCHAR, LogicalType::INTEGER}, + LogicalType::VARCHAR, ChildPosToCellVarcharFunction)); funcs.AddFunction(ScalarFunction( {LogicalType::BIGINT, LogicalType::UBIGINT, LogicalType::INTEGER}, LogicalType::UBIGINT, ChildPosToCellFunction)); @@ -334,7 +502,9 @@ CreateScalarFunctionInfo H3Functions::GetChildPosToCellFunction() { CreateScalarFunctionInfo H3Functions::GetCompactCellsFunction() { ScalarFunctionSet funcs("h3_compact_cells"); - // TODO: VARCHAR variant of this function + // funcs.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::VARCHAR)}, + // LogicalType::LIST(LogicalType::VARCHAR), + // CompactCellsVarcharFunction)); funcs.AddFunction(ScalarFunction({LogicalType::LIST(LogicalType::UBIGINT)}, LogicalType::LIST(LogicalType::UBIGINT), CompactCellsFunction)); @@ -346,7 +516,10 @@ CreateScalarFunctionInfo H3Functions::GetCompactCellsFunction() { CreateScalarFunctionInfo H3Functions::GetUncompactCellsFunction() { ScalarFunctionSet funcs("h3_uncompact_cells"); - // TODO: VARCHAR variant of this function + // funcs.AddFunction(ScalarFunction( + // {LogicalType::LIST(LogicalType::VARCHAR), LogicalType::INTEGER}, + // LogicalType::LIST(LogicalType::VARCHAR), + // UncompactCellsVarcharFunction)); funcs.AddFunction(ScalarFunction( {LogicalType::LIST(LogicalType::UBIGINT), LogicalType::INTEGER}, LogicalType::LIST(LogicalType::UBIGINT), UncompactCellsFunction)); diff --git a/test/sql/h3/h3_functions_hierarchy.test b/test/sql/h3/h3_functions_hierarchy.test index ffdbb72..bddb200 100644 --- a/test/sql/h3/h3_functions_hierarchy.test +++ b/test/sql/h3/h3_functions_hierarchy.test @@ -10,6 +10,11 @@ SELECT h3_cell_to_parent(cast(586265647244115967 as bigint), 1); ---- 581764796395814911 +query I +SELECT h3_cell_to_parent('822d57fffffffff', 1); +---- +812d7ffffffffff + query I SELECT h3_cell_to_parent(cast(586265647244115967 as ubigint), -1); ---- @@ -20,6 +25,11 @@ SELECT h3_cell_to_parent(cast(586265647244115967 as bigint), -1); ---- NULL +query I +SELECT h3_cell_to_parent('822d57fffffffff', -1); +---- +NULL + query I SELECT h3_cell_to_parent(NULL, 0); ---- @@ -35,6 +45,11 @@ SELECT h3_cell_to_center_child(cast(586265647244115967 as bigint), 4); ---- 595272305332977663 +query I +SELECT h3_cell_to_center_child('822d57fffffffff', 4); +---- +842d501ffffffff + query I SELECT h3_cell_to_center_child(cast(586265647244115967 as ubigint), 0); ---- @@ -45,6 +60,11 @@ SELECT h3_cell_to_center_child(cast(586265647244115967 as bigint), 0); ---- NULL +query I +SELECT h3_cell_to_center_child('822d57fffffffff', 0); +---- +NULL + query I select h3_cell_to_children(586265647244115967::ubigint, 3); ---- @@ -55,6 +75,11 @@ select h3_cell_to_children(586265647244115967::bigint, 3); ---- [590768765835149311, 590768834554626047, 590768903274102783, 590768971993579519, 590769040713056255, 590769109432532991, 590769178152009727] +query I +select h3_cell_to_children('822d57fffffffff', 3); +---- +[832d50fffffffff, 832d51fffffffff, 832d52fffffffff, 832d53fffffffff, 832d54fffffffff, 832d55fffffffff, 832d56fffffffff] + query I select h3_cell_to_children(586265647244115967::ubigint, 30); ---- @@ -65,6 +90,11 @@ select h3_cell_to_children(586265647244115967::bigint, 30); ---- NULL +query I +select h3_cell_to_children('822d57fffffffff', 30); +---- +NULL + query I select h3_cell_to_child_pos(597563343967879167::ubigint, 1); ---- @@ -75,6 +105,11 @@ select h3_cell_to_child_pos(597563343967879167::bigint, 1); ---- 70 +query I +select h3_cell_to_child_pos('84af8b1ffffffff', 1); +---- +70 + query I select h3_cell_to_child_pos(597563343967879167::ubigint, 100); ---- @@ -85,6 +120,11 @@ select h3_cell_to_child_pos(597563343967879167::bigint, 100); ---- NULL +query I +select h3_cell_to_child_pos('84af8b1ffffffff', 100); +---- +NULL + query I select h3_child_pos_to_cell(70::bigint, 584056178628100095::ubigint, 4); ---- @@ -95,6 +135,11 @@ select h3_child_pos_to_cell(70::bigint, 584056178628100095::bigint, 4); ---- 597563343967879167 +query I +select h3_child_pos_to_cell(70::bigint, '81afbffffffffff', 4); +---- +84af8b1ffffffff + query I select h3_child_pos_to_cell(70::bigint, 584056178628100095::ubigint, -1); ---- @@ -105,6 +150,11 @@ select h3_child_pos_to_cell(70::bigint, 584056178628100095::bigint, -1); ---- NULL +query I +select h3_child_pos_to_cell(70, '81afbffffffffff', -1); +---- +NULL + query I select h3_compact_cells([586265647244115967::ubigint, 586260699441790975::ubigint, 586244756523188223::ubigint, 586245306279002111::ubigint, 586266196999929855::ubigint, 586264547732488191::ubigint, 586267846267371519::ubigint]) ----