Skip to content

Commit

Permalink
gamma reader w/out buffer refill
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfran committed Dec 4, 2023
1 parent a19cf3d commit 49d6f0a
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[package]
name = "search-rs"
version = "0.1.0"
edition = "2021"
edition = "2021"
1 change: 1 addition & 0 deletions src/io/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod reader;
pub mod writer;
83 changes: 83 additions & 0 deletions src/io/reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use std::{
fs::File,
io::{BufReader, Read},
};

#[allow(dead_code)]
const BUFFER_SIZE: u32 = 128;

#[allow(dead_code)]
pub struct Reader {
file: BufReader<File>,
buffer: u128,
byte_buffer: [u8; 16],
read: u32,
}

#[allow(dead_code)]
impl Reader {
pub fn new(filename: &str) -> Reader {
let mut r = Reader {
file: BufReader::new(File::open(filename).expect("Can not create output file")),
buffer: 0,
byte_buffer: [0; 16],
read: 0,
};
r.update_buffer();
r
}

pub fn read_gamma(&mut self) -> u32 {
let len = self.read_unary() - 1;
(self.read_len(len) | (1 << len)) - 1
}

fn read_unary(&mut self) -> u32 {
let zeros = self.buffer.trailing_zeros();

self.buffer >>= zeros + 1;
self.read += zeros + 1;

zeros + 1
}

fn read_len(&mut self, len: u32) -> u32 {
let mask = (1 << len) - 1;

let res = self.buffer & mask;
self.buffer >>= len;
self.read += len;

res as u32
}

fn update_buffer(&mut self) {
self.file
.read_exact(&mut self.byte_buffer)
.expect("erorr while filling byte buffer");

self.buffer = u128::from_be_bytes(self.byte_buffer);
}
}

#[cfg(test)]
mod test {

use super::*;
use crate::io::writer::Writer;

#[test]
fn test_read_gamma() {
let mut w = Writer::new("data/test/writer.bin");
for i in 1..5 {
w.write_gamma(i);
}
w.flush();

let mut r = Reader::new("data/test/writer.bin");

for i in 1..5 {
assert_eq!(i, r.read_gamma());
}
}
}
66 changes: 53 additions & 13 deletions src/io/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,43 @@ impl Writer {
}
}

pub fn write_int(&mut self, n: u32) {
let free = BUFFER_SIZE - self.written;

pub fn write_gamma(&mut self, n: u32) {
let (gamma, len) = Writer::int_to_gamma(n + 1);
self.buffer |= (gamma as u128) << self.written;
self.write_internal(gamma, len);
}

fn int_to_gamma(n: u32) -> (u128, u32) {
let msb = 31 - n.leading_zeros();
let unary: u32 = 1 << msb;
let gamma: u128 = (((n ^ unary) as u128) << (msb + 1)) | unary as u128;
(gamma, 2 * msb + 1)
}

pub fn write_vbyte(&mut self, n: u32) {
let (vbyte, len) = Writer::int_to_vbyte(n + 1);
self.write_internal(vbyte, len);
}

fn int_to_vbyte(n: u32) -> (u128, u32) {
let mut vbyte: u32 = 0;

let mut n = n;
let mut byte_num = 0;
let mask = (1 << 7) - 1;

while n > 0 {
vbyte |= (n & mask) << (8 * byte_num);
n >>= 7;
byte_num += 1;
}
vbyte |= 1 << (8 * byte_num - 1);

(vbyte as u128, 8 * byte_num)
}

fn write_internal(&mut self, payload: u128, len: u32) {
let free = BUFFER_SIZE - self.written;
self.buffer |= payload << self.written;

if free > len {
self.written += len;
Expand All @@ -35,12 +67,12 @@ impl Writer {

self.update_buffer();
if len > free {
self.buffer |= (gamma as u128) >> (len - free);
self.buffer |= payload >> (len - free);
self.written += len - free;
}
}

pub fn update_buffer(&mut self) {
fn update_buffer(&mut self) {
self.file
.write_all(&self.buffer.to_be_bytes())
.expect("error while writing buffer to BufWriter");
Expand All @@ -50,17 +82,14 @@ impl Writer {
}

pub fn flush(&mut self) {
if self.written != 0 {
self.update_buffer();
}

self.file
.flush()
.expect("error while flushing BufWriter buffer");
}

pub fn int_to_gamma(n: u32) -> (u64, u32) {
let msb = 31 - n.leading_zeros();
let unary: u32 = 1 << msb;
let gamma: u64 = (((n ^ unary) as u64) << (msb + 1)) | unary as u64;
(gamma, 2 * msb + 1)
}
}

#[cfg(test)]
Expand All @@ -78,4 +107,15 @@ mod test {
assert_eq!(format!("{g:b}"), "11100");
assert_eq!(l, 5);
}

#[test]
fn test_vbyte_coding() {
let (vb, l) = Writer::int_to_vbyte(1024);
assert_eq!(format!("{vb:b}"), "1000100000000000");
assert_eq!(l, 16);

let (vb, l) = Writer::int_to_vbyte(1);
assert_eq!(format!("{vb:b}"), "10000001");
assert_eq!(l, 8);
}
}

0 comments on commit 49d6f0a

Please sign in to comment.