Skip to content

Commit

Permalink
feat(query): ST_TRANSFORM (#15992)
Browse files Browse the repository at this point in the history
* ST_TRANSFORM

Signed-off-by: Fan Yang <yangfanlinux@gmail.com>

* ST_TRANSFORM

Signed-off-by: Fan Yang <yangfanlinux@gmail.com>

* tmp test

* try fix

* try fix

* use proj4rs

* fix

---------

Signed-off-by: Fan Yang <yangfanlinux@gmail.com>
Co-authored-by: b41sh <baishen2009@gmail.com>
  • Loading branch information
kkk25641463 and b41sh authored Aug 9, 2024
1 parent 5240970 commit d24996b
Show file tree
Hide file tree
Showing 8 changed files with 466 additions and 201 deletions.
180 changes: 119 additions & 61 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ parquet = { version = "52", features = ["async"] }
paste = "1.0.15"
# TODO: let's use native tls instead.
poem = { version = "3.0", features = ["openssl-tls", "multipart", "compression"] }
proj4rs = { version = "0.1.3", features = ["geo-types", "crs-definitions"] }
prometheus-client = "0.22"
prost = { version = "0.12.1" }
prost-build = { version = "0.12.1" }
Expand Down
1 change: 1 addition & 0 deletions src/query/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ ordered-float = { workspace = true, features = [
"serde",
"rand",
] }
proj4rs = { workspace = true }
rand = { workspace = true }
regex = { workspace = true }
roaring = "0.10.1"
Expand Down
329 changes: 228 additions & 101 deletions src/query/functions/src/scalars/geometry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use databend_common_expression::types::VariantType;
use databend_common_expression::types::F64;
use databend_common_expression::vectorize_with_builder_1_arg;
use databend_common_expression::vectorize_with_builder_2_arg;
use databend_common_expression::vectorize_with_builder_3_arg;
use databend_common_expression::vectorize_with_builder_4_arg;
use databend_common_expression::FunctionDomain;
use databend_common_expression::FunctionRegistry;
Expand All @@ -42,6 +43,9 @@ use geo::EuclideanLength;
use geo::HasDimensions;
use geo::HaversineDistance;
use geo::Point;
use geo::ToDegrees;
use geo::ToRadians;
use geo_types::coord;
use geo_types::Polygon;
use geohash::decode_bbox;
use geohash::encode;
Expand All @@ -62,6 +66,8 @@ use geozero::ToWkt;
use jsonb::parse_value;
use jsonb::to_string;
use num_traits::AsPrimitive;
use proj4rs::transform::transform;
use proj4rs::Proj;

pub fn register(registry: &mut FunctionRegistry) {
// aliases
Expand Down Expand Up @@ -1502,109 +1508,112 @@ pub fn register(registry: &mut FunctionRegistry) {
),
);

// registry.register_passthrough_nullable_2_arg::<GeometryType, Int32Type, GeometryType, _, _>(
// "st_transform",
// |_, _, _| FunctionDomain::MayThrow,
// vectorize_with_builder_2_arg::<GeometryType, Int32Type, GeometryType>(
// |original, srid, builder, ctx| {
// if let Some(validity) = &ctx.validity {
// if !validity.get_bit(builder.len()) {
// builder.commit_row();
// return;
// }
// }
//
// #[allow(unused_assignments)]
// let mut from_srid = 0;
//
// // All representations of the geo types supported by crates under the GeoRust organization, have not implemented srid().
// // Currently, the srid() of all types returns the default value `None`, so we need to parse it manually here.
// match read_ewkb_srid(&mut std::io::Cursor::new(original)) {
// Ok(srid) if srid.is_some() => from_srid = srid.unwrap(),
// _ => {
// ctx.set_error(
// builder.len(),
// ErrorCode::GeometryError(" input geometry must has the correct SRID")
// .to_string(),
// );
// builder.commit_row();
// return;
// }
// }
//
// let result = {
// Ewkb(original).to_geo().map_err(ErrorCode::from).and_then(
// |mut geom: Geometry| {
// Proj::new_known_crs(&make_crs(from_srid), &make_crs(srid), None)
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
// .and_then(|proj| {
// geom.transform(&proj)
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
// .and_then(|_| {
// geom.to_ewkb(geom.dims(), Some(srid))
// .map_err(ErrorCode::from)
// })
// })
// },
// )
// };
//
// match result {
// Ok(data) => {
// builder.put_slice(data.as_slice());
// }
// Err(e) => {
// ctx.set_error(builder.len(), e.to_string());
// }
// }
//
// builder.commit_row();
// },
// ),
// );
//
// registry.register_passthrough_nullable_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType, _, _>(
// "st_transform",
// |_, _, _,_| FunctionDomain::MayThrow,
// vectorize_with_builder_3_arg::<GeometryType, Int32Type,Int32Type, GeometryType>(
// |original, from_srid, to_srid, builder, ctx| {
// if let Some(validity) = &ctx.validity {
// if !validity.get_bit(builder.len()) {
// builder.commit_row();
// return;
// }
// }
//
// let result = {
// Proj::new_known_crs(&make_crs(from_srid), &make_crs(to_srid), None)
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
// .and_then(|proj| {
// let old = Ewkb(original.to_vec());
// Ewkb(old.to_ewkb(old.dims(), Some(from_srid)).unwrap()).to_geo().map_err(ErrorCode::from).and_then(|mut geom| {
// geom.transform(&proj).map_err(|e|ErrorCode::GeometryError(e.to_string())).and_then(|_| {
// geom.to_ewkb(old.dims(), Some(to_srid)).map_err(ErrorCode::from)
// })
// })
// })
// };
// match result {
// Ok(data) => {
// builder.put_slice(data.as_slice());
// }
// Err(e) => {
// ctx.set_error(builder.len(), e.to_string());
// }
// }
//
// builder.commit_row();
// },
// ),
// );
registry.register_passthrough_nullable_2_arg::<GeometryType, Int32Type, GeometryType, _, _>(
"st_transform",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<GeometryType, Int32Type, GeometryType>(
|original, to_srid, builder, ctx| {
if let Some(validity) = &ctx.validity {
if !validity.get_bit(builder.len()) {
builder.commit_row();
return;
}
}

// All representations of the geo types supported by crates under the GeoRust organization, have not implemented srid().
// Currently, the srid() of all types returns the default value `None`, so we need to parse it manually here.
let from_srid = match Ewkb(original).to_geos().unwrap().srid() {
Some(srid) => srid,
_ => {
ctx.set_error(
builder.len(),
ErrorCode::GeometryError("input geometry must has the correct SRID")
.to_string(),
);
builder.commit_row();
return;
}
};

match st_transform_impl(original, from_srid, to_srid) {
Ok(data) => {
builder.put_slice(data.as_slice());
}
Err(e) => {
ctx.set_error(builder.len(), e.to_string());
}
}

builder.commit_row();
},
),
);

registry.register_passthrough_nullable_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType, _, _>(
"st_transform",
|_, _, _,_| FunctionDomain::MayThrow,
vectorize_with_builder_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType>(
|original, from_srid, to_srid, builder, ctx| {
if let Some(validity) = &ctx.validity {
if !validity.get_bit(builder.len()) {
builder.commit_row();
return;
}
}

match st_transform_impl(original, from_srid, to_srid) {
Ok(data) => {
builder.put_slice(data.as_slice());
}
Err(e) => {
ctx.set_error(builder.len(), e.to_string());
}
}

builder.commit_row();
},
),
);
}

// fn make_crs(srid: i32) -> String {
// format!("EPSG:{}", srid)
// }
fn st_transform_impl(
original: &[u8],
from_srid: i32,
to_srid: i32,
) -> databend_common_exception::Result<Vec<u8>> {
let from_proj = Proj::from_epsg_code(
u16::try_from(from_srid).map_err(|_| ErrorCode::GeometryError("invalid from srid"))?,
)
.map_err(|_| ErrorCode::GeometryError("invalid from srid"))?;
let to_proj = Proj::from_epsg_code(
u16::try_from(to_srid).map_err(|_| ErrorCode::GeometryError("invalid to srid"))?,
)
.map_err(|_| ErrorCode::GeometryError("invalid to srid"))?;

let old = Ewkb(original.to_vec());
Ewkb(old.to_ewkb(old.dims(), Some(from_srid)).unwrap())
.to_geo()
.map_err(ErrorCode::from)
.and_then(|mut geom| {
// EPSG:4326 WGS84 in proj4rs is in radians, not degrees.
if from_srid == 4326 {
geom.to_radians_in_place();
}
transform(&from_proj, &to_proj, &mut geom).map_err(|_| {
ErrorCode::GeometryError(format!(
"transform from {} to {} failed",
from_srid, to_srid
))
})?;
if to_srid == 4326 {
geom.to_degrees_in_place();
}
let round_geom = round_geometry_coordinates(geom);
round_geom
.to_ewkb(round_geom.dims(), Some(to_srid))
.map_err(ErrorCode::from)
})
}

#[inline]
fn get_shared_srid(geometries: &Vec<Geometry>) -> Result<Option<i32>, String> {
Expand Down Expand Up @@ -1895,3 +1904,121 @@ fn count_points(geom: &geo_types::Geometry) -> usize {
geo_types::Geometry::Triangle(_) => 4,
}
}

/// The last three decimal places of the f64 type are inconsistent between aarch64 and x86 platforms,
/// causing unit test results to fail. We will only retain six decimal places.
fn round_geometry_coordinates(geom: geo::Geometry<f64>) -> geo::Geometry<f64> {
fn round_coordinate(coord: f64) -> f64 {
(coord * 1_000_000.0).round() / 1_000_000.0
}

match geom {
geo::Geometry::Point(point) => geo::Geometry::Point(Point::new(
round_coordinate(point.x()),
round_coordinate(point.y()),
)),
geo::Geometry::LineString(linestring) => geo::Geometry::LineString(
linestring
.into_iter()
.map(|coord| coord!(x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
.collect(),
),
geo::Geometry::Polygon(polygon) => {
let outer_ring = polygon.exterior();
let mut rounded_inner_rings = Vec::new();

for inner_ring in polygon.interiors() {
let rounded_coords: Vec<Coord<f64>> = inner_ring
.into_iter()
.map(
|coord| coord!( x: round_coordinate(coord.x), y: round_coordinate(coord.y)),
)
.collect();
rounded_inner_rings.push(LineString(rounded_coords));
}

let rounded_polygon = Polygon::new(
LineString(
outer_ring
.into_iter()
.map(|coord| coord!( x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
.collect(),
),
rounded_inner_rings,
);

geo::Geometry::Polygon(rounded_polygon)
}
geo::Geometry::MultiPoint(multipoint) => geo::Geometry::MultiPoint(
multipoint
.into_iter()
.map(|point| Point::new(round_coordinate(point.x()), round_coordinate(point.y())))
.collect(),
),
geo::Geometry::MultiLineString(multilinestring) => {
let rounded_lines: Vec<LineString<f64>> = multilinestring
.into_iter()
.map(|linestring| {
LineString(
linestring
.into_iter()
.map(|coord| coord!(x: round_coordinate(coord.x), y: round_coordinate(coord.y)))
.collect(),
)
})
.collect();

geo::Geometry::MultiLineString(geo::MultiLineString::new(rounded_lines))
}
geo::Geometry::MultiPolygon(multipolygon) => {
let rounded_polygons: Vec<Polygon<f64>> = multipolygon
.into_iter()
.map(|polygon| {
let outer_ring = polygon.exterior().into_iter()
.map(|coord| coord!( x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
.collect::<Vec<Coord<f64>>>();

let mut rounded_inner_rings = Vec::new();
for inner_ring in polygon.interiors() {
let rounded_coords: Vec<Coord<f64>> = inner_ring
.into_iter()
.map(|coord| coord!( x:round_coordinate(coord.x), y: coord.y))
.collect();
rounded_inner_rings.push(LineString(rounded_coords));
}

Polygon::new(LineString(outer_ring), rounded_inner_rings)
})
.collect();
geo::Geometry::MultiPolygon(geo::MultiPolygon::new(rounded_polygons))
}
geo::Geometry::GeometryCollection(geometrycollection) => geo::Geometry::GeometryCollection(
geometrycollection
.into_iter()
.map(round_geometry_coordinates)
.collect(),
),
geo::Geometry::Line(line) => geo::Geometry::Line(geo::Line::new(
Point::new(
round_coordinate(line.start.x),
round_coordinate(line.start.y),
),
Point::new(round_coordinate(line.end.x), round_coordinate(line.end.y)),
)),
geo::Geometry::Rect(rect) => geo::Geometry::Rect(geo::Rect::new(
Point::new(
round_coordinate(rect.min().x),
round_coordinate(rect.min().y),
),
Point::new(
round_coordinate(rect.max().x),
round_coordinate(rect.max().y),
),
)),
geo::Geometry::Triangle(triangle) => geo::Geometry::Triangle(geo::Triangle::new(
coord!(x: round_coordinate(triangle.0.x), y: round_coordinate(triangle.0.y)),
coord!(x: round_coordinate(triangle.1.x), y: round_coordinate(triangle.1.y)),
coord!(x: round_coordinate(triangle.2.x), y: round_coordinate(triangle.2.y)),
)),
}
}
Loading

0 comments on commit d24996b

Please sign in to comment.