Skip to content

Commit

Permalink
compressed storage for rustdoc- and source-files.
Browse files Browse the repository at this point in the history
  • Loading branch information
syphar authored and jyn514 committed Sep 12, 2021
1 parent b70039c commit dd650cb
Show file tree
Hide file tree
Showing 24 changed files with 1,031 additions and 218 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ jobs:
for f in ./test-binaries/*; do
echo "running $f"
chmod +x $f # GH action artifacts don't handle permissions
$f --ignored || exit 1
# run build-tests. Limited to one thread since we don't support parallel builds.
$f --ignored --test-threads=1 || exit 1
done
- name: Clean up the database
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ target
.vagrant
.rustwide
.rustwide-docker
.archive_cache
.workspace
70 changes: 66 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ font-awesome-as-a-crate = { path = "crates/font-awesome-as-a-crate" }
dashmap = "3.11.10"
string_cache = "0.8.0"
postgres-types = { version = "0.2", features = ["derive"] }
zip = "0.5.11"
bzip2 = "0.4.2"
serde_cbor = "0.11.1"
getrandom = "0.2.1"

# Async
Expand Down Expand Up @@ -104,6 +107,7 @@ criterion = "0.3"
kuchiki = "0.8"
rand = "0.8"
mockito = "0.29"
test-case = "1.2.0"

[build-dependencies]
time = "0.1"
Expand Down
28 changes: 22 additions & 6 deletions benches/compression.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use docs_rs::storage::{compress, decompress, CompressionAlgorithm};

const ALGORITHM: CompressionAlgorithm = CompressionAlgorithm::Zstd;

pub fn regex_capture_matches(c: &mut Criterion) {
// this isn't a great benchmark because it only tests on one file
// ideally we would build a whole crate and compress each file, taking the average
Expand All @@ -11,11 +9,29 @@ pub fn regex_capture_matches(c: &mut Criterion) {

c.benchmark_group("regex html")
.throughput(Throughput::Bytes(html_slice.len() as u64))
.bench_function("compress", |b| {
b.iter(|| compress(black_box(html_slice), ALGORITHM));
.bench_function("compress zstd", |b| {
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Zstd));
})
.bench_function("decompress zstd", |b| {
b.iter(|| {
decompress(
black_box(html_slice),
CompressionAlgorithm::Zstd,
5 * 1024 * 1024,
)
});
})
.bench_function("compress bzip2", |b| {
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Bzip2));
})
.bench_function("decompress", |b| {
b.iter(|| decompress(black_box(html_slice), ALGORITHM, 5 * 1024 * 1024));
.bench_function("decompress bzip2", |b| {
b.iter(|| {
decompress(
black_box(html_slice),
CompressionAlgorithm::Bzip2,
5 * 1024 * 1024,
)
});
});
}

Expand Down
7 changes: 4 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,16 @@ services:
entrypoint: >
/bin/sh -c "
mkdir -p /data/rust-docs-rs;
minio server /data;
minio server /data --console-address ":9001";
"
ports:
- "9000:9000"
- "9001:9001"
volumes:
- minio-data:/data
environment:
MINIO_ACCESS_KEY: cratesfyi
MINIO_SECRET_KEY: secret_key
MINIO_ROOT_USER: cratesfyi
MINIO_ROOT_PASSWORD: secret_key
healthcheck:
test:
[
Expand Down
2 changes: 1 addition & 1 deletion src/bin/cratesfyi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ impl Context for BinContext {
fn storage(self) -> Storage = Storage::new(
self.pool()?,
self.metrics()?,
&*self.config()?,
self.config()?,
)?;
fn config(self) -> Config = Config::from_env()?;
fn metrics(self) -> Metrics = Metrics::new()?;
Expand Down
9 changes: 9 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ pub struct Config {
// For unit-tests the number has to be higher.
pub(crate) random_crate_search_view_size: u32,

// where do we want to store the locally cached index files
// for the remote archives?
pub(crate) local_archive_cache_path: PathBuf,

// Content Security Policy
pub(crate) csp_report_only: bool,

Expand Down Expand Up @@ -127,6 +131,11 @@ impl Config {

csp_report_only: env("DOCSRS_CSP_REPORT_ONLY", false)?,

local_archive_cache_path: env(
"DOCSRS_ARCHIVE_INDEX_CACHE_PATH",
PathBuf::from(".archive_cache"),
)?,

rustwide_workspace: env("DOCSRS_RUSTWIDE_WORKSPACE", PathBuf::from(".workspace"))?,
inside_docker: env("DOCSRS_DOCKER", false)?,
docker_image: maybe_env("DOCSRS_LOCAL_DOCKER_IMAGE")?
Expand Down
9 changes: 6 additions & 3 deletions src/db/add_package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub(crate) fn add_package_into_database(
has_examples: bool,
compression_algorithms: std::collections::HashSet<CompressionAlgorithm>,
repository_id: Option<i32>,
archive_storage: bool,
) -> Result<i32> {
debug!("Adding package into database");
let crate_id = initialize_package_in_database(conn, metadata_pkg)?;
Expand All @@ -56,12 +57,12 @@ pub(crate) fn add_package_into_database(
keywords, have_examples, downloads, files,
doc_targets, is_library, doc_rustc_version,
documentation_url, default_target, features,
repository_id
repository_id, archive_storage
)
VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9,
$10, $11, $12, $13, $14, $15, $16, $17, $18,
$19, $20, $21, $22, $23, $24, $25, $26
$19, $20, $21, $22, $23, $24, $25, $26, $27
)
ON CONFLICT (crate_id, version) DO UPDATE
SET release_time = $3,
Expand All @@ -87,7 +88,8 @@ pub(crate) fn add_package_into_database(
documentation_url = $23,
default_target = $24,
features = $25,
repository_id = $26
repository_id = $26,
archive_storage = $27
RETURNING id",
&[
&crate_id,
Expand Down Expand Up @@ -116,6 +118,7 @@ pub(crate) fn add_package_into_database(
&default_target,
&features,
&repository_id,
&archive_storage,
],
)?;

Expand Down
14 changes: 13 additions & 1 deletion src/db/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! However, postgres is still available for testing and backwards compatibility.

use crate::error::Result;
use crate::storage::{CompressionAlgorithms, Storage};
use crate::storage::{CompressionAlgorithm, CompressionAlgorithms, Storage};

use serde_json::Value;
use std::path::{Path, PathBuf};
Expand All @@ -34,6 +34,18 @@ pub fn add_path_into_database<P: AsRef<Path>>(
))
}

pub fn add_path_into_remote_archive<P: AsRef<Path>>(
storage: &Storage,
archive_path: &str,
path: P,
) -> Result<(Value, CompressionAlgorithm)> {
let (file_list, algorithm) = storage.store_all_in_archive(archive_path, path.as_ref())?;
Ok((
file_list_to_json(file_list.into_iter().collect()),
algorithm,
))
}

fn file_list_to_json(file_list: Vec<(PathBuf, String)>) -> Value {
Value::Array(
file_list
Expand Down
5 changes: 5 additions & 0 deletions src/db/migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,11 @@ pub fn migrate(version: Option<Version>, conn: &mut Client) -> crate::error::Res
"ALTER TABLE builds RENAME COLUMN cratesfyi_version TO docsrs_version",
"ALTER TABLE builds RENAME COLUMN docsrs_version TO cratesfyi_version",
),
migration!(
context, 30, "add archive-storage marker for releases",
"ALTER TABLE releases ADD COLUMN archive_storage BOOL NOT NULL DEFAULT FALSE;",
"ALTER TABLE releases DROP COLUMN archive_storage;",
),
];

for migration in migrations {
Expand Down
2 changes: 1 addition & 1 deletion src/db/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub(crate) use self::add_package::{
add_build_into_database, add_doc_coverage, add_package_into_database,
};
pub use self::delete::{delete_crate, delete_version};
pub use self::file::add_path_into_database;
pub use self::file::{add_path_into_database, add_path_into_remote_archive};
pub use self::migrate::migrate;
pub use self::pool::{Pool, PoolClient, PoolError};

Expand Down
Loading

0 comments on commit dd650cb

Please sign in to comment.