Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compressed storage for rustdoc- and source-files #1342

Merged
merged 1 commit into from
Sep 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ jobs:
for f in ./test-binaries/*; do
echo "running $f"
chmod +x $f # GH action artifacts don't handle permissions
$f --ignored || exit 1
# run build-tests. Limited to one thread since we don't support parallel builds.
$f --ignored --test-threads=1 || exit 1
done

- name: Clean up the database
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ target
.vagrant
.rustwide
.rustwide-docker
.archive_cache
.workspace
70 changes: 66 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ font-awesome-as-a-crate = { path = "crates/font-awesome-as-a-crate" }
dashmap = "3.11.10"
string_cache = "0.8.0"
postgres-types = { version = "0.2", features = ["derive"] }
zip = "0.5.11"
bzip2 = "0.4.2"
serde_cbor = "0.11.1"
getrandom = "0.2.1"

# Async
Expand Down Expand Up @@ -104,6 +107,7 @@ criterion = "0.3"
kuchiki = "0.8"
rand = "0.8"
mockito = "0.29"
test-case = "1.2.0"

[build-dependencies]
time = "0.1"
Expand Down
28 changes: 22 additions & 6 deletions benches/compression.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
use docs_rs::storage::{compress, decompress, CompressionAlgorithm};

const ALGORITHM: CompressionAlgorithm = CompressionAlgorithm::Zstd;

pub fn regex_capture_matches(c: &mut Criterion) {
// this isn't a great benchmark because it only tests on one file
// ideally we would build a whole crate and compress each file, taking the average
Expand All @@ -11,11 +9,29 @@ pub fn regex_capture_matches(c: &mut Criterion) {

c.benchmark_group("regex html")
.throughput(Throughput::Bytes(html_slice.len() as u64))
.bench_function("compress", |b| {
b.iter(|| compress(black_box(html_slice), ALGORITHM));
.bench_function("compress zstd", |b| {
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Zstd));
})
.bench_function("decompress zstd", |b| {
b.iter(|| {
decompress(
black_box(html_slice),
CompressionAlgorithm::Zstd,
5 * 1024 * 1024,
)
});
})
.bench_function("compress bzip2", |b| {
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Bzip2));
})
.bench_function("decompress", |b| {
b.iter(|| decompress(black_box(html_slice), ALGORITHM, 5 * 1024 * 1024));
.bench_function("decompress bzip2", |b| {
b.iter(|| {
decompress(
black_box(html_slice),
CompressionAlgorithm::Bzip2,
5 * 1024 * 1024,
)
});
});
}

Expand Down
7 changes: 4 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,16 @@ services:
entrypoint: >
/bin/sh -c "
mkdir -p /data/rust-docs-rs;
minio server /data;
minio server /data --console-address ":9001";
"
ports:
- "9000:9000"
- "9001:9001"
volumes:
- minio-data:/data
environment:
MINIO_ACCESS_KEY: cratesfyi
MINIO_SECRET_KEY: secret_key
MINIO_ROOT_USER: cratesfyi
MINIO_ROOT_PASSWORD: secret_key
healthcheck:
test:
[
Expand Down
2 changes: 1 addition & 1 deletion src/bin/cratesfyi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ impl Context for BinContext {
fn storage(self) -> Storage = Storage::new(
self.pool()?,
self.metrics()?,
&*self.config()?,
self.config()?,
)?;
fn config(self) -> Config = Config::from_env()?;
fn metrics(self) -> Metrics = Metrics::new()?;
Expand Down
9 changes: 9 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ pub struct Config {
// For unit-tests the number has to be higher.
pub(crate) random_crate_search_view_size: u32,

// where do we want to store the locally cached index files
// for the remote archives?
pub(crate) local_archive_cache_path: PathBuf,

// Content Security Policy
pub(crate) csp_report_only: bool,

Expand Down Expand Up @@ -127,6 +131,11 @@ impl Config {

csp_report_only: env("DOCSRS_CSP_REPORT_ONLY", false)?,

local_archive_cache_path: env(
"DOCSRS_ARCHIVE_INDEX_CACHE_PATH",
PathBuf::from(".archive_cache"),
)?,

rustwide_workspace: env("DOCSRS_RUSTWIDE_WORKSPACE", PathBuf::from(".workspace"))?,
inside_docker: env("DOCSRS_DOCKER", false)?,
docker_image: maybe_env("DOCSRS_LOCAL_DOCKER_IMAGE")?
Expand Down
9 changes: 6 additions & 3 deletions src/db/add_package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub(crate) fn add_package_into_database(
has_examples: bool,
compression_algorithms: std::collections::HashSet<CompressionAlgorithm>,
repository_id: Option<i32>,
archive_storage: bool,
) -> Result<i32> {
debug!("Adding package into database");
let crate_id = initialize_package_in_database(conn, metadata_pkg)?;
Expand All @@ -56,12 +57,12 @@ pub(crate) fn add_package_into_database(
keywords, have_examples, downloads, files,
doc_targets, is_library, doc_rustc_version,
documentation_url, default_target, features,
repository_id
repository_id, archive_storage
)
VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9,
$10, $11, $12, $13, $14, $15, $16, $17, $18,
$19, $20, $21, $22, $23, $24, $25, $26
$19, $20, $21, $22, $23, $24, $25, $26, $27
)
ON CONFLICT (crate_id, version) DO UPDATE
SET release_time = $3,
Expand All @@ -87,7 +88,8 @@ pub(crate) fn add_package_into_database(
documentation_url = $23,
default_target = $24,
features = $25,
repository_id = $26
repository_id = $26,
archive_storage = $27
RETURNING id",
&[
&crate_id,
Expand Down Expand Up @@ -116,6 +118,7 @@ pub(crate) fn add_package_into_database(
&default_target,
&features,
&repository_id,
&archive_storage,
],
)?;

Expand Down
14 changes: 13 additions & 1 deletion src/db/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//! However, postgres is still available for testing and backwards compatibility.

use crate::error::Result;
use crate::storage::{CompressionAlgorithms, Storage};
use crate::storage::{CompressionAlgorithm, CompressionAlgorithms, Storage};

use serde_json::Value;
use std::path::{Path, PathBuf};
Expand All @@ -34,6 +34,18 @@ pub fn add_path_into_database<P: AsRef<Path>>(
))
}

pub fn add_path_into_remote_archive<P: AsRef<Path>>(
storage: &Storage,
archive_path: &str,
path: P,
) -> Result<(Value, CompressionAlgorithm)> {
let (file_list, algorithm) = storage.store_all_in_archive(archive_path, path.as_ref())?;
Ok((
file_list_to_json(file_list.into_iter().collect()),
algorithm,
))
}

fn file_list_to_json(file_list: Vec<(PathBuf, String)>) -> Value {
Value::Array(
file_list
Expand Down
5 changes: 5 additions & 0 deletions src/db/migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,11 @@ pub fn migrate(version: Option<Version>, conn: &mut Client) -> crate::error::Res
"ALTER TABLE builds RENAME COLUMN cratesfyi_version TO docsrs_version",
"ALTER TABLE builds RENAME COLUMN docsrs_version TO cratesfyi_version",
),
migration!(
context, 30, "add archive-storage marker for releases",
"ALTER TABLE releases ADD COLUMN archive_storage BOOL NOT NULL DEFAULT FALSE;",
"ALTER TABLE releases DROP COLUMN archive_storage;",
),
];

for migration in migrations {
Expand Down
2 changes: 1 addition & 1 deletion src/db/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub(crate) use self::add_package::{
add_build_into_database, add_doc_coverage, add_package_into_database,
};
pub use self::delete::{delete_crate, delete_version};
pub use self::file::add_path_into_database;
pub use self::file::{add_path_into_database, add_path_into_remote_archive};
pub use self::migrate::migrate;
pub use self::pool::{Pool, PoolClient, PoolError};

Expand Down
Loading