Skip to content

Commit

Permalink
add temporary command to fix broken archive indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
syphar committed Jul 12, 2024
1 parent 7ad8c62 commit 2abdeaa
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 8 deletions.
85 changes: 82 additions & 3 deletions src/bin/cratesfyi.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
use std::env;
use std::fmt::Write;
use std::net::SocketAddr;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::{env, fs};

use anyhow::{anyhow, Context as _, Error, Result};
use axum::async_trait;
use clap::{Parser, Subcommand, ValueEnum};
use docs_rs::cdn::CdnBackend;
use docs_rs::db::{self, add_path_into_database, Overrides, Pool, PoolClient};
use docs_rs::repositories::RepositoryStatsUpdater;
use docs_rs::storage::{rustdoc_archive_path, source_archive_path, PathNotFoundError};
use docs_rs::utils::{
get_config, get_crate_pattern_and_priority, list_crate_priorities, queue_builder,
remove_crate_priority, set_config, set_crate_priority, ConfigName,
remove_crate_priority, set_config, set_crate_priority, spawn_blocking, ConfigName,
};
use docs_rs::{
start_background_metrics_webserver, start_web_server, AsyncStorage, BuildQueue, Config,
Expand All @@ -23,6 +24,7 @@ use docs_rs::{
use futures_util::StreamExt;
use humantime::Duration;
use once_cell::sync::OnceCell;
use rusqlite::{Connection, OpenFlags};
use sentry::TransactionContext;
use tokio::runtime::{Builder, Runtime};
use tracing_log::LogTracer;
Expand Down Expand Up @@ -509,6 +511,9 @@ enum DatabaseSubcommand {
/// temporary commant to update the `crates.latest_version_id` field
UpdateLatestVersionId,

/// temporary command to rebuild a subset of the archive indexes
FixBrokenArchiveIndexes,

/// Updates Github/Gitlab stats for crates.
UpdateRepositoryFields,

Expand Down Expand Up @@ -567,6 +572,80 @@ impl DatabaseSubcommand {
.context("Failed to run database migrations")?
}

Self::FixBrokenArchiveIndexes => {
let pool = ctx.pool()?;
let build_queue = ctx.build_queue()?;
ctx.runtime()?
.block_on(async {
let storage = ctx.async_storage().await?;
let mut conn = pool.get_async().await?;
let mut result_stream = sqlx::query!(
"
SELECT c.name, r.version, r.release_time
FROM crates c, releases r
WHERE c.id = r.crate_id
ORDER BY r.id
"
)
.fetch(&mut *conn);

while let Some(row) = result_stream.next().await {
let row = row?;

println!(
"checking index for {} {} ({:?})",
row.name, row.version, row.release_time
);

for path in &[
rustdoc_archive_path(&row.name, &row.version),
source_archive_path(&row.name, &row.version),
] {
let local_archive_index_filename = match storage
.download_archive_index(path, 42)
.await
{
Ok(path) => path,
Err(err)
if err.downcast_ref::<PathNotFoundError>().is_some() =>
{
continue
}
Err(err) => return Err(err),
};

let count = {
let connection = Connection::open_with_flags(
&local_archive_index_filename,
OpenFlags::SQLITE_OPEN_READ_ONLY
| OpenFlags::SQLITE_OPEN_NO_MUTEX,
)?;
let mut stmt =
connection.prepare("SELECT count(*) FROM files")?;

stmt.query_row([], |row| Ok(row.get::<_, usize>(0)))??
};

fs::remove_file(&local_archive_index_filename)?;

if count >= 65000 {
println!("...big index, queueing rebuild");
spawn_blocking({
let build_queue = build_queue.clone();
let name = row.name.clone();
let version = row.version.clone();
move || build_queue.add_crate(&name, &version, 5, None)
})
.await?;
}
}
}

Ok::<(), anyhow::Error>(())
})
.context("Failed to queue rebuilds for big documentation sizes")?
}

Self::UpdateLatestVersionId => {
let pool = ctx.pool()?;
ctx.runtime()?
Expand All @@ -581,7 +660,7 @@ impl DatabaseSubcommand {
while let Some(row) = result_stream.next().await {
let row = row?;

println!("handling crate {}", row.name);
println!("handling crate {} ", row.name);

db::update_latest_version_id(&mut update_conn, row.id).await?;
}
Expand Down
8 changes: 4 additions & 4 deletions src/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ type FileRange = RangeInclusive<u64>;

#[derive(Debug, thiserror::Error)]
#[error("path not found")]
pub(crate) struct PathNotFoundError;
pub struct PathNotFoundError;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct Blob {
Expand Down Expand Up @@ -304,7 +304,7 @@ impl AsyncStorage {
}

#[instrument]
pub(super) async fn download_archive_index(
pub async fn download_archive_index(
&self,
archive_path: &str,
latest_build_id: i32,
Expand Down Expand Up @@ -823,11 +823,11 @@ fn detect_mime(file_path: impl AsRef<Path>) -> &'static str {
}
}

pub(crate) fn rustdoc_archive_path(name: &str, version: &str) -> String {
pub fn rustdoc_archive_path(name: &str, version: &str) -> String {
format!("rustdoc/{name}/{version}.zip")
}

pub(crate) fn source_archive_path(name: &str, version: &str) -> String {
pub fn source_archive_path(name: &str, version: &str) -> String {
format!("sources/{name}/{version}.zip")
}

Expand Down
2 changes: 1 addition & 1 deletion src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ where
/// })
/// .await?
/// ```
pub(crate) async fn spawn_blocking<F, R>(f: F) -> Result<R>
pub async fn spawn_blocking<F, R>(f: F) -> Result<R>
where
F: FnOnce() -> Result<R> + Send + 'static,
R: Send + 'static,
Expand Down

0 comments on commit 2abdeaa

Please sign in to comment.