Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(query): ST_TRANSFORM #15992

Merged
merged 22 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 118 additions & 60 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ parking_lot = "0.12.1"
parquet = { version = "52", features = ["async"] }
paste = "1.0.15"
poem = { version = "3.0", features = ["rustls", "multipart", "compression"] }
proj4rs = { version = "0.1.3", features = ["geo-types", "crs-definitions"] }
prometheus-client = "0.22"
prost = { version = "0.12.1" }
prost-build = { version = "0.12.1" }
Expand Down
1 change: 1 addition & 0 deletions src/query/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ ordered-float = { workspace = true, features = [
"serde",
"rand",
] }
proj4rs = { workspace = true }
rand = { workspace = true }
regex = { workspace = true }
roaring = "0.10.1"
Expand Down
329 changes: 228 additions & 101 deletions src/query/functions/src/scalars/geometry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use databend_common_expression::types::VariantType;
use databend_common_expression::types::F64;
use databend_common_expression::vectorize_with_builder_1_arg;
use databend_common_expression::vectorize_with_builder_2_arg;
use databend_common_expression::vectorize_with_builder_3_arg;
use databend_common_expression::vectorize_with_builder_4_arg;
use databend_common_expression::FunctionDomain;
use databend_common_expression::FunctionRegistry;
Expand All @@ -42,6 +43,9 @@ use geo::EuclideanLength;
use geo::HasDimensions;
use geo::HaversineDistance;
use geo::Point;
use geo::ToDegrees;
use geo::ToRadians;
use geo_types::coord;
use geo_types::Polygon;
use geohash::decode_bbox;
use geohash::encode;
Expand All @@ -62,6 +66,8 @@ use geozero::ToWkt;
use jsonb::parse_value;
use jsonb::to_string;
use num_traits::AsPrimitive;
use proj4rs::transform::transform;
use proj4rs::Proj;

pub fn register(registry: &mut FunctionRegistry) {
// aliases
Expand Down Expand Up @@ -1502,109 +1508,112 @@ pub fn register(registry: &mut FunctionRegistry) {
),
);

// registry.register_passthrough_nullable_2_arg::<GeometryType, Int32Type, GeometryType, _, _>(
b41sh marked this conversation as resolved.
Show resolved Hide resolved
// "st_transform",
// |_, _, _| FunctionDomain::MayThrow,
// vectorize_with_builder_2_arg::<GeometryType, Int32Type, GeometryType>(
// |original, srid, builder, ctx| {
// if let Some(validity) = &ctx.validity {
// if !validity.get_bit(builder.len()) {
// builder.commit_row();
// return;
// }
// }
//
// #[allow(unused_assignments)]
// let mut from_srid = 0;
//
// // All representations of the geo types supported by crates under the GeoRust organization, have not implemented srid().
// // Currently, the srid() of all types returns the default value `None`, so we need to parse it manually here.
// match read_ewkb_srid(&mut std::io::Cursor::new(original)) {
// Ok(srid) if srid.is_some() => from_srid = srid.unwrap(),
// _ => {
// ctx.set_error(
// builder.len(),
// ErrorCode::GeometryError(" input geometry must has the correct SRID")
// .to_string(),
// );
// builder.commit_row();
// return;
// }
// }
//
// let result = {
// Ewkb(original).to_geo().map_err(ErrorCode::from).and_then(
// |mut geom: Geometry| {
// Proj::new_known_crs(&make_crs(from_srid), &make_crs(srid), None)
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
// .and_then(|proj| {
// geom.transform(&proj)
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
// .and_then(|_| {
// geom.to_ewkb(geom.dims(), Some(srid))
// .map_err(ErrorCode::from)
// })
// })
// },
// )
// };
//
// match result {
// Ok(data) => {
// builder.put_slice(data.as_slice());
// }
// Err(e) => {
// ctx.set_error(builder.len(), e.to_string());
// }
// }
//
// builder.commit_row();
// },
// ),
// );
//
// registry.register_passthrough_nullable_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType, _, _>(
// "st_transform",
// |_, _, _,_| FunctionDomain::MayThrow,
// vectorize_with_builder_3_arg::<GeometryType, Int32Type,Int32Type, GeometryType>(
// |original, from_srid, to_srid, builder, ctx| {
// if let Some(validity) = &ctx.validity {
// if !validity.get_bit(builder.len()) {
// builder.commit_row();
// return;
// }
// }
//
// let result = {
// Proj::new_known_crs(&make_crs(from_srid), &make_crs(to_srid), None)
// .map_err(|e| ErrorCode::GeometryError(e.to_string()))
// .and_then(|proj| {
// let old = Ewkb(original.to_vec());
// Ewkb(old.to_ewkb(old.dims(), Some(from_srid)).unwrap()).to_geo().map_err(ErrorCode::from).and_then(|mut geom| {
// geom.transform(&proj).map_err(|e|ErrorCode::GeometryError(e.to_string())).and_then(|_| {
// geom.to_ewkb(old.dims(), Some(to_srid)).map_err(ErrorCode::from)
// })
// })
// })
// };
// match result {
// Ok(data) => {
// builder.put_slice(data.as_slice());
// }
// Err(e) => {
// ctx.set_error(builder.len(), e.to_string());
// }
// }
//
// builder.commit_row();
// },
// ),
// );
registry.register_passthrough_nullable_2_arg::<GeometryType, Int32Type, GeometryType, _, _>(
"st_transform",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<GeometryType, Int32Type, GeometryType>(
|original, to_srid, builder, ctx| {
if let Some(validity) = &ctx.validity {
if !validity.get_bit(builder.len()) {
builder.commit_row();
return;
}
}

// All representations of the geo types supported by crates under the GeoRust organization, have not implemented srid().
// Currently, the srid() of all types returns the default value `None`, so we need to parse it manually here.
let from_srid = match Ewkb(original).to_geos().unwrap().srid() {
Some(srid) => srid,
_ => {
ctx.set_error(
builder.len(),
ErrorCode::GeometryError("input geometry must has the correct SRID")
.to_string(),
);
builder.commit_row();
return;
}
};

match st_transform_impl(original, from_srid, to_srid) {
Ok(data) => {
builder.put_slice(data.as_slice());
}
Err(e) => {
ctx.set_error(builder.len(), e.to_string());
}
}

builder.commit_row();
},
),
);

registry.register_passthrough_nullable_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType, _, _>(
"st_transform",
|_, _, _,_| FunctionDomain::MayThrow,
vectorize_with_builder_3_arg::<GeometryType, Int32Type, Int32Type, GeometryType>(
|original, from_srid, to_srid, builder, ctx| {
if let Some(validity) = &ctx.validity {
if !validity.get_bit(builder.len()) {
builder.commit_row();
return;
}
}

match st_transform_impl(original, from_srid, to_srid) {
Ok(data) => {
builder.put_slice(data.as_slice());
}
Err(e) => {
ctx.set_error(builder.len(), e.to_string());
}
}

builder.commit_row();
},
),
);
}

// fn make_crs(srid: i32) -> String {
// format!("EPSG:{}", srid)
// }
fn st_transform_impl(
original: &[u8],
from_srid: i32,
to_srid: i32,
) -> databend_common_exception::Result<Vec<u8>> {
let from_proj = Proj::from_epsg_code(
u16::try_from(from_srid).map_err(|_| ErrorCode::GeometryError("invalid from srid"))?,
)
.map_err(|_| ErrorCode::GeometryError("invalid from srid"))?;
let to_proj = Proj::from_epsg_code(
u16::try_from(to_srid).map_err(|_| ErrorCode::GeometryError("invalid to srid"))?,
)
.map_err(|_| ErrorCode::GeometryError("invalid to srid"))?;

let old = Ewkb(original.to_vec());
Ewkb(old.to_ewkb(old.dims(), Some(from_srid)).unwrap())
.to_geo()
.map_err(ErrorCode::from)
.and_then(|mut geom| {
// EPSG:4326 WGS84 in proj4rs is in radians, not degrees.
if from_srid == 4326 {
geom.to_radians_in_place();
}
transform(&from_proj, &to_proj, &mut geom).map_err(|_| {
ErrorCode::GeometryError(format!(
"transform from {} to {} failed",
from_srid, to_srid
))
})?;
if to_srid == 4326 {
geom.to_degrees_in_place();
}
let round_geom = round_geometry_coordinates(geom);
round_geom
.to_ewkb(round_geom.dims(), Some(to_srid))
.map_err(ErrorCode::from)
})
}

#[inline]
fn get_shared_srid(geometries: &Vec<Geometry>) -> Result<Option<i32>, String> {
Expand Down Expand Up @@ -1895,3 +1904,121 @@ fn count_points(geom: &geo_types::Geometry) -> usize {
geo_types::Geometry::Triangle(_) => 4,
}
}

/// The last three decimal places of the f64 type are inconsistent between aarch64 and x86 platforms,
/// causing unit test results to fail. We will only retain six decimal places.
fn round_geometry_coordinates(geom: geo::Geometry<f64>) -> geo::Geometry<f64> {
fn round_coordinate(coord: f64) -> f64 {
(coord * 1_000_000.0).round() / 1_000_000.0
}

match geom {
geo::Geometry::Point(point) => geo::Geometry::Point(Point::new(
round_coordinate(point.x()),
round_coordinate(point.y()),
)),
geo::Geometry::LineString(linestring) => geo::Geometry::LineString(
linestring
.into_iter()
.map(|coord| coord!(x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
.collect(),
),
geo::Geometry::Polygon(polygon) => {
let outer_ring = polygon.exterior();
let mut rounded_inner_rings = Vec::new();

for inner_ring in polygon.interiors() {
let rounded_coords: Vec<Coord<f64>> = inner_ring
.into_iter()
.map(
|coord| coord!( x: round_coordinate(coord.x), y: round_coordinate(coord.y)),
)
.collect();
rounded_inner_rings.push(LineString(rounded_coords));
}

let rounded_polygon = Polygon::new(
LineString(
outer_ring
.into_iter()
.map(|coord| coord!( x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
.collect(),
),
rounded_inner_rings,
);

geo::Geometry::Polygon(rounded_polygon)
}
geo::Geometry::MultiPoint(multipoint) => geo::Geometry::MultiPoint(
multipoint
.into_iter()
.map(|point| Point::new(round_coordinate(point.x()), round_coordinate(point.y())))
.collect(),
),
geo::Geometry::MultiLineString(multilinestring) => {
let rounded_lines: Vec<LineString<f64>> = multilinestring
.into_iter()
.map(|linestring| {
LineString(
linestring
.into_iter()
.map(|coord| coord!(x: round_coordinate(coord.x), y: round_coordinate(coord.y)))
.collect(),
)
})
.collect();

geo::Geometry::MultiLineString(geo::MultiLineString::new(rounded_lines))
}
geo::Geometry::MultiPolygon(multipolygon) => {
let rounded_polygons: Vec<Polygon<f64>> = multipolygon
.into_iter()
.map(|polygon| {
let outer_ring = polygon.exterior().into_iter()
.map(|coord| coord!( x:round_coordinate(coord.x), y:round_coordinate(coord.y)))
.collect::<Vec<Coord<f64>>>();

let mut rounded_inner_rings = Vec::new();
for inner_ring in polygon.interiors() {
let rounded_coords: Vec<Coord<f64>> = inner_ring
.into_iter()
.map(|coord| coord!( x:round_coordinate(coord.x), y: coord.y))
.collect();
rounded_inner_rings.push(LineString(rounded_coords));
}

Polygon::new(LineString(outer_ring), rounded_inner_rings)
})
.collect();
geo::Geometry::MultiPolygon(geo::MultiPolygon::new(rounded_polygons))
}
geo::Geometry::GeometryCollection(geometrycollection) => geo::Geometry::GeometryCollection(
geometrycollection
.into_iter()
.map(round_geometry_coordinates)
.collect(),
),
geo::Geometry::Line(line) => geo::Geometry::Line(geo::Line::new(
Point::new(
round_coordinate(line.start.x),
round_coordinate(line.start.y),
),
Point::new(round_coordinate(line.end.x), round_coordinate(line.end.y)),
)),
geo::Geometry::Rect(rect) => geo::Geometry::Rect(geo::Rect::new(
Point::new(
round_coordinate(rect.min().x),
round_coordinate(rect.min().y),
),
Point::new(
round_coordinate(rect.max().x),
round_coordinate(rect.max().y),
),
)),
geo::Geometry::Triangle(triangle) => geo::Geometry::Triangle(geo::Triangle::new(
coord!(x: round_coordinate(triangle.0.x), y: round_coordinate(triangle.0.y)),
coord!(x: round_coordinate(triangle.1.x), y: round_coordinate(triangle.1.y)),
coord!(x: round_coordinate(triangle.2.x), y: round_coordinate(triangle.2.y)),
)),
}
}
Loading
Loading