Skip to content

Commit

Permalink
feat(fuzz): add validator for inserted rows (#3932)
Browse files Browse the repository at this point in the history
* feat(fuzz): add validator for inserted rows

* fix: compatibility with mysql types

* feat(fuzz): add datetime and date type in mysql for row validator
  • Loading branch information
CookiePieWw authored May 15, 2024
1 parent 09129a9 commit cfae276
Show file tree
Hide file tree
Showing 11 changed files with 373 additions and 14 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests-fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ unstable = ["nix"]
[dependencies]
arbitrary = { version = "1.3.0", features = ["derive"] }
async-trait = { workspace = true }
chrono = { workspace = true }
common-error = { workspace = true }
common-macro = { workspace = true }
common-query = { workspace = true }
Expand Down
6 changes: 5 additions & 1 deletion tests-fuzz/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ pub mod select_expr;
use std::fmt;

use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value;
use rand::Rng;

use crate::error::Error;
use crate::ir::create_expr::ColumnOption;
use crate::ir::{AlterTableExpr, CreateTableExpr};
use crate::ir::{AlterTableExpr, CreateTableExpr, Ident};

pub type CreateTableExprGenerator<R> =
Box<dyn Generator<CreateTableExpr, R, Error = Error> + Sync + Send>;
Expand All @@ -36,6 +37,9 @@ pub type ColumnOptionGenerator<R> = Box<dyn Fn(&mut R, &ConcreteDataType) -> Vec

pub type ConcreteDataTypeGenerator<R> = Box<dyn Random<ConcreteDataType, R>>;

pub type ValueGenerator<R> =
Box<dyn Fn(&mut R, &ConcreteDataType, Option<&dyn Random<Ident, R>>) -> Value>;

pub trait Generator<T, R: Rng> {
type Error: Sync + Send + fmt::Debug;

Expand Down
13 changes: 6 additions & 7 deletions tests-fuzz/src/generator/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use rand::Rng;
use crate::context::TableContextRef;
use crate::error::{Error, Result};
use crate::fake::WordGenerator;
use crate::generator::{Generator, Random};
use crate::generator::{Generator, Random, ValueGenerator};
use crate::ir::insert_expr::{InsertIntoExpr, RowValue};
use crate::ir::{generate_random_value, Ident};

Expand All @@ -37,6 +37,8 @@ pub struct InsertExprGenerator<R: Rng + 'static> {
rows: usize,
#[builder(default = "Box::new(WordGenerator)")]
word_generator: Box<dyn Random<Ident, R>>,
#[builder(default = "Box::new(generate_random_value)")]
value_generator: ValueGenerator<R>,
#[builder(default)]
_phantom: PhantomData<R>,
}
Expand Down Expand Up @@ -81,7 +83,7 @@ impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {
continue;
}

row.push(RowValue::Value(generate_random_value(
row.push(RowValue::Value((self.value_generator)(
rng,
&column.column_type,
Some(self.word_generator.as_ref()),
Expand All @@ -93,11 +95,8 @@ impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {

Ok(InsertIntoExpr {
table_name: self.table_ctx.name.to_string(),
columns: if self.omit_column_list {
vec![]
} else {
values_columns
},
omit_column_list: self.omit_column_list,
columns: values_columns,
values_list,
})
}
Expand Down
76 changes: 76 additions & 0 deletions tests-fuzz/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,21 @@ lazy_static! {
];
pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
vec![ConcreteDataType::string_datatype()];
pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
// MySQL only permits fractional seconds with up to microseconds (6 digits) precision.
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_second_datatype(),
];
}

impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
impl_random!(
ConcreteDataType,
MySQLTsColumnTypeGenerator,
MYSQL_TS_DATA_TYPES
);
impl_random!(
ConcreteDataType,
PartibleColumnTypeGenerator,
Expand All @@ -82,6 +93,7 @@ impl_random!(

pub struct ColumnTypeGenerator;
pub struct TsColumnTypeGenerator;
pub struct MySQLTsColumnTypeGenerator;
pub struct PartibleColumnTypeGenerator;
pub struct StringColumnTypeGenerator;

Expand Down Expand Up @@ -110,6 +122,31 @@ pub fn generate_random_value<R: Rng>(
}
}

/// Generates a random [Value] for MySQL.
pub fn generate_random_value_for_mysql<R: Rng>(
rng: &mut R,
datatype: &ConcreteDataType,
random_str: Option<&dyn Random<Ident, R>>,
) -> Value {
match datatype {
&ConcreteDataType::Boolean(_) => Value::from(rng.gen::<bool>()),
ConcreteDataType::Int16(_) => Value::from(rng.gen::<i16>()),
ConcreteDataType::Int32(_) => Value::from(rng.gen::<i32>()),
ConcreteDataType::Int64(_) => Value::from(rng.gen::<i64>()),
ConcreteDataType::Float32(_) => Value::from(rng.gen::<f32>()),
ConcreteDataType::Float64(_) => Value::from(rng.gen::<f64>()),
ConcreteDataType::String(_) => match random_str {
Some(random) => Value::from(random.gen(rng).value),
None => Value::from(rng.gen::<char>().to_string()),
},
ConcreteDataType::Date(_) => generate_random_date(rng),
ConcreteDataType::DateTime(_) => generate_random_datetime(rng),
&ConcreteDataType::Timestamp(ts_type) => generate_random_timestamp_for_mysql(rng, ts_type),

_ => unimplemented!("unsupported type: {datatype}"),
}
}

fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
Expand Down Expand Up @@ -140,6 +177,37 @@ fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Val
Value::from(v)
}

// MySQL supports timestamp from '1970-01-01 00:00:01.000000' to '2038-01-19 03:14:07.499999'
fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
let min = 1;
let max = 2_147_483_647;
let value = rng.gen_range(min..=max);
Timestamp::new_second(value)
}
TimestampType::Millisecond(_) => {
let min = 1000;
let max = 2_147_483_647_499;
let value = rng.gen_range(min..=max);
Timestamp::new_millisecond(value)
}
TimestampType::Microsecond(_) => {
let min = 1_000_000;
let max = 2_147_483_647_499_999;
let value = rng.gen_range(min..=max);
Timestamp::new_microsecond(value)
}
TimestampType::Nanosecond(_) => {
let min = 1_000_000_000;
let max = 2_147_483_647_499_999_000;
let value = rng.gen_range(min..=max);
Timestamp::new_nanosecond(value)
}
};
Value::from(v)
}

fn generate_random_datetime<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
Expand Down Expand Up @@ -258,6 +326,14 @@ impl Column {
)
})
}

// Returns default value if it has.
pub fn default_value(&self) -> Option<&Value> {
self.options.iter().find_map(|opt| match opt {
ColumnOption::DefaultValue(value) => Some(value),
_ => None,
})
}
}

/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.
Expand Down
48 changes: 47 additions & 1 deletion tests-fuzz/src/ir/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,36 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt::Display;
use std::fmt::{Debug, Display};

use datatypes::value::Value;

use crate::ir::Column;

pub struct InsertIntoExpr {
pub table_name: String,
pub omit_column_list: bool,
pub columns: Vec<Column>,
pub values_list: Vec<RowValues>,
}

pub type RowValues = Vec<RowValue>;

#[derive(PartialEq, PartialOrd, Clone)]
pub enum RowValue {
Value(Value),
Default,
}

impl RowValue {
pub fn cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match (self, other) {
(RowValue::Value(v1), RowValue::Value(v2)) => v1.partial_cmp(v2),
_ => panic!("Invalid comparison: {:?} and {:?}", self, other),
}
}
}

impl Display for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand All @@ -46,3 +57,38 @@ impl Display for RowValue {
}
}
}

impl Debug for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RowValue::Value(v) => match v {
Value::Null => write!(f, "NULL"),
v @ (Value::String(_)
| Value::Timestamp(_)
| Value::DateTime(_)
| Value::Date(_)) => write!(f, "'{}'", v),
v => write!(f, "{}", v),
},
RowValue::Default => write!(f, "DEFAULT"),
}
}
}

#[cfg(test)]
mod tests {
use common_time::Timestamp;
use datatypes::value::Value;

use crate::ir::insert_expr::RowValue;

#[test]
fn test_value_cmp() {
let time_stampe1 =
Value::Timestamp(Timestamp::from_str_utc("-39988-01-31 01:21:12.848697+0000").unwrap());
let time_stampe2 =
Value::Timestamp(Timestamp::from_str_utc("+12970-09-22 08:40:58.392839+0000").unwrap());
let v1 = RowValue::Value(time_stampe1);
let v2 = RowValue::Value(time_stampe2);
assert_eq!(v1.cmp(&v2), Some(std::cmp::Ordering::Less));
}
}
2 changes: 1 addition & 1 deletion tests-fuzz/src/translator/mysql/insert_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl DslTranslator<InsertIntoExpr, String> for InsertIntoExprTranslator {

impl InsertIntoExprTranslator {
fn format_columns(input: &InsertIntoExpr) -> String {
if input.columns.is_empty() {
if input.omit_column_list {
"".to_string()
} else {
let list = input
Expand Down
1 change: 1 addition & 0 deletions tests-fuzz/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
// limitations under the License.

pub mod column;
pub mod row;
Loading

0 comments on commit cfae276

Please sign in to comment.