Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorporated faster line count #11

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ name = "integration"
path = "tests/tests.rs"

[dependencies]
bytecount = "0.1"
deque = "0.3"
docopt = "0.6"
env_logger = "0.3"
Expand Down
84 changes: 3 additions & 81 deletions src/search_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ printing matches. In particular, it searches the file in a streaming fashion
using `read` calls and a (roughly) fixed size buffer.
*/

extern crate bytecount;

use std::cmp;
use std::error::Error as StdError;
use std::fmt;
Expand Down Expand Up @@ -543,89 +545,9 @@ pub fn is_binary(buf: &[u8]) -> bool {
}

/// Count the number of lines in the given buffer.
#[inline(never)]

#[inline(never)]
pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
// This was adapted from code in the memchr crate. The specific benefit
// here is that we can avoid a branch in the inner loop because all we're
// doing is counting.

// The technique to count EOL bytes was adapted from:
// http://bits.stephan-brumme.com/null.html
const LO_U64: u64 = 0x0101010101010101;
const HI_U64: u64 = 0x8080808080808080;

// use truncation
const LO_USIZE: usize = LO_U64 as usize;
const HI_USIZE: usize = HI_U64 as usize;

#[cfg(target_pointer_width = "32")]
const USIZE_BYTES: usize = 4;
#[cfg(target_pointer_width = "64")]
const USIZE_BYTES: usize = 8;

fn count_eol(eol: usize) -> u64 {
// Ideally, this would compile down to a POPCNT instruction, but
// it looks like you need to set RUSTFLAGS="-C target-cpu=native"
// (or target-feature=+popcnt) to get that to work. Bummer.
(eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64
}

#[cfg(target_pointer_width = "32")]
fn repeat_byte(b: u8) -> usize {
let mut rep = (b as usize) << 8 | b as usize;
rep = rep << 16 | rep;
rep
}

#[cfg(target_pointer_width = "64")]
fn repeat_byte(b: u8) -> usize {
let mut rep = (b as usize) << 8 | b as usize;
rep = rep << 16 | rep;
rep = rep << 32 | rep;
rep
}

fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 {
let mut count = 0;
while let Some(pos) = memchr(eol, buf) {
count += 1;
buf = &buf[pos + 1..];
}
count
}

let len = buf.len();
let ptr = buf.as_ptr();
let mut count = 0;

// Search up to an aligned boundary...
let align = (ptr as usize) & (USIZE_BYTES - 1);
let mut i = 0;
if align > 0 {
i = cmp::min(USIZE_BYTES - align, len);
count += count_lines_slow(&buf[..i], eol);
}

// ... and search the rest.
let repeated_eol = repeat_byte(eol);

if len >= 2 * USIZE_BYTES {
while i <= len - (2 * USIZE_BYTES) {
unsafe {
let u = *(ptr.offset(i as isize) as *const usize);
let v = *(ptr.offset((i + USIZE_BYTES) as isize)
as *const usize);

count += count_eol(u ^ repeated_eol);
count += count_eol(v ^ repeated_eol);
}
i += USIZE_BYTES * 2;
}
}
count += count_lines_slow(&buf[i..], eol);
count
bytecount::count(buf, eol) as u64
}

/// Replaces a with b in buf.
Expand Down