Skip to content

Commit

Permalink
Reduce bounds check in RowIter, add unsafe Rows::row_unchecked (#…
Browse files Browse the repository at this point in the history
…6142)

* update

* update comment

* update row-iter bench

* make clippy happy
  • Loading branch information
XiangpengHao authored Aug 6, 2024
1 parent 2a4f269 commit 63a6209
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
23 changes: 18 additions & 5 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -835,10 +835,20 @@ impl Rows {

/// Returns the row at index `row`
pub fn row(&self, row: usize) -> Row<'_> {
let end = self.offsets[row + 1];
let start = self.offsets[row];
assert!(row + 1 < self.offsets.len());
unsafe { self.row_unchecked(row) }
}

/// Returns the row at `index` without bounds checking
///
/// # Safety
/// Caller must ensure that `index` is less than the number of offsets (#rows + 1)
pub unsafe fn row_unchecked(&self, index: usize) -> Row<'_> {
let end = unsafe { self.offsets.get_unchecked(index + 1) };
let start = unsafe { self.offsets.get_unchecked(index) };
let data = unsafe { self.buffer.get_unchecked(*start..*end) };
Row {
data: &self.buffer[start..end],
data,
config: &self.config,
}
}
Expand Down Expand Up @@ -898,7 +908,9 @@ impl<'a> Iterator for RowsIter<'a> {
if self.end == self.start {
return None;
}
let row = self.rows.row(self.start);

// SAFETY: We have checked that `start` is less than `end`
let row = unsafe { self.rows.row_unchecked(self.start) };
self.start += 1;
Some(row)
}
Expand All @@ -920,7 +932,8 @@ impl<'a> DoubleEndedIterator for RowsIter<'a> {
if self.end == self.start {
return None;
}
let row = self.rows.row(self.end);
// Safety: We have checked that `start` is less than `end`
let row = unsafe { self.rows.row_unchecked(self.end) };
self.end -= 1;
Some(row)
}
Expand Down
18 changes: 18 additions & 0 deletions arrow/benches/row_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ fn do_bench(c: &mut Criterion, name: &str, cols: Vec<ArrayRef>) {
});
}

fn bench_iter(c: &mut Criterion) {
let col = create_string_view_array_with_len(40960, 0., 100, false);
let converter = RowConverter::new(vec![SortField::new(col.data_type().clone())]).unwrap();
let rows = converter
.convert_columns(&[Arc::new(col) as ArrayRef])
.unwrap();

c.bench_function("iterate rows", |b| {
b.iter(|| {
for r in rows.iter() {
std::hint::black_box(r.as_ref());
}
})
});
}

fn row_bench(c: &mut Criterion) {
let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) as ArrayRef];
do_bench(c, "4096 u64(0)", cols);
Expand Down Expand Up @@ -145,6 +161,8 @@ fn row_bench(c: &mut Criterion) {
Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
];
do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)", cols);

bench_iter(c);
}

criterion_group!(benches, row_bench);
Expand Down

0 comments on commit 63a6209

Please sign in to comment.