Skip to content

Commit

Permalink
[smallvec] Optionally use SmallVec for some internal scratch vectors
Browse files Browse the repository at this point in the history
We have some temporary vectors used during processing that will
usually only hold a few entries. By using SmallVec here, we can
avoid extra heap allocations in common cases.

(In Gecko's usage, I'm seeing around 4% improvement on the bidi-
resolution subtest of perf_reftest_singletons with this change.)
  • Loading branch information
jfkthame committed Feb 21, 2024
1 parent b3fc605 commit c35de55
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ name = "unicode_bidi"
flame = { version = "0.2", optional = true }
flamer = { version = "0.4", optional = true }
serde = { version = ">=0.8, <2.0", default-features = false, optional = true, features = ["derive"] }
smallvec = { version = ">=1.13", optional = true, features = ["union"] }

[dev-dependencies]
serde_test = ">=0.8, <2.0"
Expand Down
18 changes: 16 additions & 2 deletions src/implicit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

use alloc::vec::Vec;
use core::cmp::max;
#[cfg(feature = "smallvec")]
use smallvec::SmallVec;

use super::char_data::BidiClass::{self, *};
use super::level::Level;
Expand Down Expand Up @@ -39,7 +41,13 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
// The previous class for the purposes of rule W1, not tracking changes from any other rules.
let mut prev_class_before_w1 = sequence.sos;
let mut last_strong_is_al = false;
#[cfg(feature = "smallvec")]
let mut et_run_indices = SmallVec::<[usize; 8]>::new(); // for W5
#[cfg(not(feature = "smallvec"))]
let mut et_run_indices = Vec::new(); // for W5
#[cfg(feature = "smallvec")]
let mut bn_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
#[cfg(not(feature = "smallvec"))]
let mut bn_run_indices = Vec::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>

for (run_index, level_run) in sequence.runs.iter().enumerate() {
Expand Down Expand Up @@ -177,7 +185,7 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
_ => {
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// If there was a BN run before this, that's now a part of this ET run.
et_run_indices.extend(&bn_run_indices);
et_run_indices.extend(bn_run_indices.clone());

// In case this is followed by an EN.
et_run_indices.push(i);
Expand Down Expand Up @@ -411,6 +419,9 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
let mut prev_class = sequence.sos;
while let Some(mut i) = indices.next() {
// Process sequences of NI characters.
#[cfg(feature = "smallvec")]
let mut ni_run = SmallVec::<[usize; 8]>::new();
#[cfg(not(feature = "smallvec"))]
let mut ni_run = Vec::new();
// The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
if is_NI(processing_classes[i]) || processing_classes[i] == BN {
Expand Down Expand Up @@ -484,7 +495,10 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
original_classes: &[BidiClass],
) -> Vec<BracketPair> {
let mut ret = vec![];
let mut stack = vec![];
#[cfg(feature = "smallvec")]
let mut stack = SmallVec::<[(char, usize, usize); 8]>::new();
#[cfg(not(feature = "smallvec"))]
let mut stack = Vec::new();

for (run_index, level_run) in run_sequence.runs.iter().enumerate() {
for (i, ch) in text.subrange(level_run.clone()).char_indices() {
Expand Down
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
extern crate std;
#[macro_use]
extern crate alloc;
#[cfg(feature = "smallvec")]
extern crate smallvec;

pub mod data_source;
pub mod deprecated;
Expand Down Expand Up @@ -99,6 +101,8 @@ use core::cmp;
use core::iter::repeat;
use core::ops::Range;
use core::str::CharIndices;
#[cfg(feature = "smallvec")]
use smallvec::SmallVec;

use crate::format_chars as chars;
use crate::BidiClass::*;
Expand Down Expand Up @@ -300,6 +304,9 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
let mut original_classes = Vec::with_capacity(text.len());

// The stack contains the starting code unit index for each nested isolate we're inside.
#[cfg(feature = "smallvec")]
let mut isolate_stack = SmallVec::<[usize; 8]>::new();
#[cfg(not(feature = "smallvec"))]
let mut isolate_stack = Vec::new();

debug_assert!(
Expand Down

0 comments on commit c35de55

Please sign in to comment.