Skip to content

Commit

Permalink
Rollup merge of rust-lang#30565 - michaelwoerister:opaque_encoder, r=…
Browse files Browse the repository at this point in the history
…brson

This PR changes the `emit_opaque` and `read_opaque` methods in the RBML library to use a space-efficient binary encoder that does not emit any tags and uses the LEB128 variable-length integer format for all numbers it emits.

The space savings are nice, albeit a bit underwhelming, especially for dynamic libraries where metadata is already compressed.

| RLIBs        |  NEW   |   OLD     |
|--------------|--------|-----------|
|libstd        | 8.8 MB |  10.5 MB  |
|libcore       |15.6 MB |   19.7 MB |
|libcollections| 3.7 MB |    4.8 MB |
|librustc      |34.0 MB |   37.8 MB |
|libsyntax     |28.3 MB |   32.1 MB |

| SOs           |     NEW   |    OLD |
|---------------|-----------|--------|
| libstd        |  4.8 MB   | 5.1 MB |
| librustc      |  8.6 MB   | 9.2 MB |
| libsyntax     |  7.8 MB   | 8.4 MB |

At least this should make up for the size increase caused recently by also storing MIR in crate metadata.

Can this be a breaking change for anyone?
cc @rust-lang/compiler
  • Loading branch information
nagisa committed Dec 31, 2015
2 parents ad3371a + fa2a741 commit 23d24ff
Show file tree
Hide file tree
Showing 12 changed files with 1,308 additions and 369 deletions.
162 changes: 162 additions & 0 deletions src/librbml/leb128.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#[inline]
pub fn write_to_vec(vec: &mut Vec<u8>, position: &mut usize, byte: u8)
{
if *position == vec.len() {
vec.push(byte);
} else {
vec[*position] = byte;
}

*position += 1;
}

pub fn write_unsigned_leb128(out: &mut Vec<u8>,
start_position: usize,
mut value: u64)
-> usize {
let mut position = start_position;
loop
{
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80;
}

write_to_vec(out, &mut position, byte);

if value == 0 {
break;
}
}

return position - start_position;
}

pub fn read_unsigned_leb128(data: &[u8],
start_position: usize)
-> (u64, usize) {
let mut result = 0;
let mut shift = 0;
let mut position = start_position;
loop {
let byte = data[position];
position += 1;
result |= ((byte & 0x7F) as u64) << shift;
if (byte & 0x80) == 0 {
break;
}
shift += 7;
}

(result, position - start_position)
}


pub fn write_signed_leb128(out: &mut Vec<u8>,
start_position: usize,
mut value: i64) -> usize {
let mut position = start_position;

loop {
let mut byte = (value as u8) & 0x7f;
value >>= 7;
let more = !((((value == 0 ) && ((byte & 0x40) == 0)) ||
((value == -1) && ((byte & 0x40) != 0))));
if more {
byte |= 0x80; // Mark this byte to show that more bytes will follow.
}

write_to_vec(out, &mut position, byte);

if !more {
break;
}
}

return position - start_position;
}

pub fn read_signed_leb128(data: &[u8],
start_position: usize)
-> (i64, usize) {
let mut result = 0;
let mut shift = 0;
let mut position = start_position;
let mut byte;

loop {
byte = data[position];
position += 1;
result |= ((byte & 0x7F) as i64) << shift;
shift += 7;

if (byte & 0x80) == 0 {
break;
}
}

if (shift < 64) && ((byte & 0x40) != 0) {
/* sign extend */
result |= -(1i64 << shift);
}

(result, position - start_position)
}

#[test]
fn test_unsigned_leb128() {
let mut stream = Vec::with_capacity(10000);

for x in 0..62 {
let pos = stream.len();
let bytes_written = write_unsigned_leb128(&mut stream, pos, 3 << x);
assert_eq!(stream.len(), pos + bytes_written);
}

let mut position = 0;
for x in 0..62 {
let expected = 3 << x;
let (actual, bytes_read) = read_unsigned_leb128(&stream, position);
assert_eq!(expected, actual);
position += bytes_read;
}
assert_eq!(stream.len(), position);
}

#[test]
fn test_signed_leb128() {
let mut values = Vec::new();

let mut i = -500;
while i < 500 {
values.push(i * 123457i64);
i += 1;
}

let mut stream = Vec::new();

for &x in &values {
let pos = stream.len();
let bytes_written = write_signed_leb128(&mut stream, pos, x);
assert_eq!(stream.len(), pos + bytes_written);
}

let mut pos = 0;
for &x in &values {
let (value, bytes_read) = read_signed_leb128(&mut stream, pos);
pos += bytes_read;
assert_eq!(x, value);
}
assert_eq!(pos, stream.len());
}
33 changes: 23 additions & 10 deletions src/librbml/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,17 @@
#![cfg_attr(test, feature(test))]

extern crate serialize;

#[cfg(test)]
extern crate serialize as rustc_serialize; // Used by RustcEncodable

#[macro_use] extern crate log;

#[cfg(test)] extern crate test;

pub mod opaque;
pub mod leb128;

pub use self::EbmlEncoderTag::*;
pub use self::Error::*;

Expand Down Expand Up @@ -241,6 +248,7 @@ pub mod reader {

use serialize;

use super::opaque;
use super::{ ApplicationError, EsVec, EsMap, EsEnum, EsSub8, EsSub32,
EsVecElt, EsMapKey, EsU64, EsU32, EsU16, EsU8, EsI64,
EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal,
Expand Down Expand Up @@ -621,18 +629,16 @@ pub mod reader {
}

pub fn read_opaque<R, F>(&mut self, op: F) -> DecodeResult<R> where
F: FnOnce(&mut Decoder, Doc) -> DecodeResult<R>,
F: FnOnce(&mut opaque::Decoder, Doc) -> DecodeResult<R>,
{
let doc = try!(self.next_doc(EsOpaque));

let (old_parent, old_pos) = (self.parent, self.pos);
self.parent = doc;
self.pos = doc.start;

let result = try!(op(self, doc));
let result = {
let mut opaque_decoder = opaque::Decoder::new(doc.data,
doc.start);
try!(op(&mut opaque_decoder, doc))
};

self.parent = old_parent;
self.pos = old_pos;
Ok(result)
}

Expand Down Expand Up @@ -877,6 +883,7 @@ pub mod writer {
use std::io::prelude::*;
use std::io::{self, SeekFrom, Cursor};

use super::opaque;
use super::{ EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey,
EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8,
EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal,
Expand Down Expand Up @@ -1120,10 +1127,16 @@ pub mod writer {
}

pub fn emit_opaque<F>(&mut self, f: F) -> EncodeResult where
F: FnOnce(&mut Encoder) -> EncodeResult,
F: FnOnce(&mut opaque::Encoder) -> EncodeResult,
{
try!(self.start_tag(EsOpaque as usize));
try!(f(self));

{
let mut opaque_encoder = opaque::Encoder::new(self.writer);
try!(f(&mut opaque_encoder));
}

self.mark_stable_position();
self.end_tag()
}
}
Expand Down
Loading

0 comments on commit 23d24ff

Please sign in to comment.