Skip to content

Commit

Permalink
Fix comparison kernel benchmarks (#6147)
Browse files Browse the repository at this point in the history
* fix comparison kernel benchmarks

* add comment as suggested by @alamb
  • Loading branch information
samuelcolvin authored Jul 29, 2024
1 parent 80ed712 commit 11f2bb8
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 13 deletions.
20 changes: 10 additions & 10 deletions arrow/benches/comparison_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,11 @@ fn add_benchmark(c: &mut Criterion) {
});

c.bench_function("like_utf8 scalar ends with", |b| {
b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx%"))
b.iter(|| bench_like_utf8_scalar(&arr_string, "%xxxx"))
});

c.bench_function("like_utf8 scalar starts with", |b| {
b.iter(|| bench_like_utf8_scalar(&arr_string, "%xxxx"))
b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx%"))
});

c.bench_function("like_utf8 scalar complex", |b| {
Expand All @@ -237,11 +237,11 @@ fn add_benchmark(c: &mut Criterion) {
});

c.bench_function("like_utf8view scalar ends with", |b| {
b.iter(|| bench_like_utf8view_scalar(&string_view_left, "xxxx%"))
b.iter(|| bench_like_utf8view_scalar(&string_view_left, "%xxxx"))
});

c.bench_function("like_utf8view scalar starts with", |b| {
b.iter(|| bench_like_utf8view_scalar(&string_view_left, "%xxxx"))
b.iter(|| bench_like_utf8view_scalar(&string_view_left, "xxxx%"))
});

c.bench_function("like_utf8view scalar complex", |b| {
Expand All @@ -259,11 +259,11 @@ fn add_benchmark(c: &mut Criterion) {
});

c.bench_function("nlike_utf8 scalar ends with", |b| {
b.iter(|| bench_nlike_utf8_scalar(&arr_string, "xxxx%"))
b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xxxx"))
});

c.bench_function("nlike_utf8 scalar starts with", |b| {
b.iter(|| bench_nlike_utf8_scalar(&arr_string, "%xxxx"))
b.iter(|| bench_nlike_utf8_scalar(&arr_string, "xxxx%"))
});

c.bench_function("nlike_utf8 scalar complex", |b| {
Expand All @@ -281,11 +281,11 @@ fn add_benchmark(c: &mut Criterion) {
});

c.bench_function("ilike_utf8 scalar ends with", |b| {
b.iter(|| bench_ilike_utf8_scalar(&arr_string, "xXXx%"))
b.iter(|| bench_ilike_utf8_scalar(&arr_string, "%xXXx"))
});

c.bench_function("ilike_utf8 scalar starts with", |b| {
b.iter(|| bench_ilike_utf8_scalar(&arr_string, "%XXXx"))
b.iter(|| bench_ilike_utf8_scalar(&arr_string, "XXXx%"))
});

c.bench_function("ilike_utf8 scalar complex", |b| {
Expand All @@ -303,11 +303,11 @@ fn add_benchmark(c: &mut Criterion) {
});

c.bench_function("nilike_utf8 scalar ends with", |b| {
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "xXXx%"))
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xXXx"))
});

c.bench_function("nilike_utf8 scalar starts with", |b| {
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%XXXx"))
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "XXXx%"))
});

c.bench_function("nilike_utf8 scalar complex", |b| {
Expand Down
29 changes: 26 additions & 3 deletions arrow/src/util/bench_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ pub fn create_month_day_nano_array_with_seed(
.collect()
}

/// Creates an random (but fixed-seeded) array of a given size and null density
/// Creates a random (but fixed-seeded) array of a given size and null density
pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
where
Standard: Distribution<bool>,
Expand All @@ -108,12 +108,35 @@ where
.collect()
}

/// Creates an random (but fixed-seeded) array of a given size and null density
/// Creates a random (but fixed-seeded) string array of a given size and null density, strings have a random length
/// between 0 and 400 alphanumeric characters. `0..400` is chosen to cover a wide range of common string lengths,
/// which have a dramatic impact on performance of some queries, e.g. LIKE/ILIKE/regex.
pub fn create_string_array<Offset: OffsetSizeTrait>(
size: usize,
null_density: f32,
) -> GenericStringArray<Offset> {
create_string_array_with_len(size, null_density, 4)
create_string_array_with_max_len(size, null_density, 400)
}

/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
size: usize,
null_density: f32,
max_str_len: usize,
) -> GenericStringArray<Offset> {
let rng = &mut seedable_rng();
(0..size)
.map(|_| {
if rng.gen::<f32>() < null_density {
None
} else {
let str_len = rng.gen_range(0..max_str_len);
let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
let value = String::from_utf8(value).unwrap();
Some(value)
}
})
.collect()
}

/// Creates a random (but fixed-seeded) array of a given size, null density and length
Expand Down

0 comments on commit 11f2bb8

Please sign in to comment.