Skip to content

Commit

Permalink
feat: add json_path_match udf (#4864)
Browse files Browse the repository at this point in the history
* add json_path_match udf

* sql tests for json_path_match

* fix clippy & comment

* fix null value behavior

* added null tests

* adjust function's behavior on nulls

* update test cases

* fix null check of json
  • Loading branch information
Kev1n8 authored Oct 25, 2024
1 parent e328c70 commit 4e9c251
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/common/function/src/scalars/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use std::sync::Arc;
mod json_get;
mod json_is;
mod json_path_exists;
mod json_path_match;
mod json_to_string;
mod parse_json;

Expand Down Expand Up @@ -49,5 +50,6 @@ impl JsonFunction {
registry.register(Arc::new(JsonIsObject));

registry.register(Arc::new(json_path_exists::JsonPathExistsFunction));
registry.register(Arc::new(json_path_match::JsonPathMatchFunction));
}
}
202 changes: 202 additions & 0 deletions src/common/function/src/scalars/json/json_path_match.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt::{self, Display};

use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;

use crate::function::{Function, FunctionContext};

/// Check if the given JSON data match the given JSON path's predicate.
#[derive(Clone, Debug, Default)]
pub struct JsonPathMatchFunction;

const NAME: &str = "json_path_match";

impl Function for JsonPathMatchFunction {
fn name(&self) -> &str {
NAME
}

fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}

fn signature(&self) -> Signature {
Signature::exact(
vec![
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype(),
],
Volatility::Immutable,
)
}

fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let paths = &columns[1];

let size = jsons.len();
let mut results = BooleanVectorBuilder::with_capacity(size);

for i in 0..size {
let json = jsons.get_ref(i);
let path = paths.get_ref(i);

match json.data_type() {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
let json = json.as_binary();
let path = path.as_string();
let result = match (json, path) {
(Ok(Some(json)), Ok(Some(path))) => {
if !jsonb::is_null(json) {
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
match json_path {
Ok(json_path) => jsonb::path_match(json, json_path).ok(),
Err(_) => None,
}
} else {
None
}
}
_ => None,
};

results.push(result);
}

_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
}

Ok(results.to_vector())
}
}

impl Display for JsonPathMatchFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "JSON_PATH_MATCH")
}
}

#[cfg(test)]
mod tests {
use std::sync::Arc;

use common_query::prelude::TypeSignature;
use datatypes::vectors::{BinaryVector, StringVector};

use super::*;

#[test]
fn test_json_path_match_function() {
let json_path_match = JsonPathMatchFunction;

assert_eq!("json_path_match", json_path_match.name());
assert_eq!(
ConcreteDataType::boolean_datatype(),
json_path_match
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);

assert!(matches!(json_path_match.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()],
));

let json_strings = [
Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()),
Some(r#"[1,2,3]"#.to_string()),
Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()),
Some(r#"null"#.to_string()),
Some(r#"null"#.to_string()),
];

let paths = vec![
Some("$.a.b == 2".to_string()),
Some("$.b[1 to last] >= 2".to_string()),
Some("$.c > 0".to_string()),
Some("$[0 to last] > 0".to_string()),
Some(r#"null"#.to_string()),
Some("$.c > 0".to_string()),
Some(r#"null"#.to_string()),
];

let results = [
Some(true),
Some(true),
Some(false),
Some(true),
None,
None,
None,
];

let jsonbs = json_strings
.into_iter()
.map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
.collect::<Vec<_>>();

let json_vector = BinaryVector::from(jsonbs);
let path_vector = StringVector::from(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_path_match
.eval(FunctionContext::default(), &args)
.unwrap();

assert_eq!(7, vector.len());
for (i, expected) in results.iter().enumerate() {
let result = vector.get_ref(i);

match expected {
Some(expected_value) => {
assert!(!result.is_null());
let result_value = result.as_boolean().unwrap().unwrap();
assert_eq!(*expected_value, result_value);
}
None => {
assert!(result.is_null());
}
}
}
}
}
57 changes: 57 additions & 0 deletions tests/cases/standalone/common/function/json/json.result
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,60 @@ SELECT json_path_exists(parse_json('[1, 2]'), '[2]');
| false |
+----------------------------------------------------------+

SELECT json_path_exists(parse_json('[1, 2]'), 'null');

+-----------------------------------------------------------+
| json_path_exists(parse_json(Utf8("[1, 2]")),Utf8("null")) |
+-----------------------------------------------------------+
| false |
+-----------------------------------------------------------+

SELECT json_path_exists(parse_json('null'), '$.a');

+--------------------------------------------------------+
| json_path_exists(parse_json(Utf8("null")),Utf8("$.a")) |
+--------------------------------------------------------+
| false |
+--------------------------------------------------------+

--- json_path_match ---
SELECT json_path_match(parse_json('{"a": 1, "b": 2}'), '$.a == 1');

+------------------------------------------------------------------------+
| json_path_match(parse_json(Utf8("{"a": 1, "b": 2}")),Utf8("$.a == 1")) |
+------------------------------------------------------------------------+
| true |
+------------------------------------------------------------------------+

SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[0] > 1');

+-----------------------------------------------------------------------------+
| json_path_match(parse_json(Utf8("{"a":1,"b":[1,2,3]}")),Utf8("$.b[0] > 1")) |
+-----------------------------------------------------------------------------+
| false |
+-----------------------------------------------------------------------------+

SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[1 to last] >= 2');

+--------------------------------------------------------------------------------------+
| json_path_match(parse_json(Utf8("{"a":1,"b":[1,2,3]}")),Utf8("$.b[1 to last] >= 2")) |
+--------------------------------------------------------------------------------------+
| true |
+--------------------------------------------------------------------------------------+

SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), 'null');

+-----------------------------------------------------------------------+
| json_path_match(parse_json(Utf8("{"a":1,"b":[1,2,3]}")),Utf8("null")) |
+-----------------------------------------------------------------------+
| |
+-----------------------------------------------------------------------+

SELECT json_path_match(parse_json('null'), '$.a == 1');

+------------------------------------------------------------+
| json_path_match(parse_json(Utf8("null")),Utf8("$.a == 1")) |
+------------------------------------------------------------+
| |
+------------------------------------------------------------+

16 changes: 16 additions & 0 deletions tests/cases/standalone/common/function/json/json.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,19 @@ SELECT json_path_exists(parse_json('{"a": 1, "b": 2}'), '$.c');
SELECT json_path_exists(parse_json('[1, 2]'), '[0]');

SELECT json_path_exists(parse_json('[1, 2]'), '[2]');

SELECT json_path_exists(parse_json('[1, 2]'), 'null');

SELECT json_path_exists(parse_json('null'), '$.a');

--- json_path_match ---

SELECT json_path_match(parse_json('{"a": 1, "b": 2}'), '$.a == 1');

SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[0] > 1');

SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[1 to last] >= 2');

SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), 'null');

SELECT json_path_match(parse_json('null'), '$.a == 1');

0 comments on commit 4e9c251

Please sign in to comment.