From 2abdeaa00bb0e68ec0f6a3d4efb7b41e7085b6b5 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 12 Jul 2024 18:51:24 +0200 Subject: [PATCH] add temporary command to fix broken archive indexes --- src/bin/cratesfyi.rs | 85 ++++++++++++++++++++++++++++++++++++++++++-- src/storage/mod.rs | 8 ++--- src/utils/mod.rs | 2 +- 3 files changed, 87 insertions(+), 8 deletions(-) diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index 0d8b46ca6..bdc4048e1 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -1,9 +1,9 @@ -use std::env; use std::fmt::Write; use std::net::SocketAddr; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; +use std::{env, fs}; use anyhow::{anyhow, Context as _, Error, Result}; use axum::async_trait; @@ -11,9 +11,10 @@ use clap::{Parser, Subcommand, ValueEnum}; use docs_rs::cdn::CdnBackend; use docs_rs::db::{self, add_path_into_database, Overrides, Pool, PoolClient}; use docs_rs::repositories::RepositoryStatsUpdater; +use docs_rs::storage::{rustdoc_archive_path, source_archive_path, PathNotFoundError}; use docs_rs::utils::{ get_config, get_crate_pattern_and_priority, list_crate_priorities, queue_builder, - remove_crate_priority, set_config, set_crate_priority, ConfigName, + remove_crate_priority, set_config, set_crate_priority, spawn_blocking, ConfigName, }; use docs_rs::{ start_background_metrics_webserver, start_web_server, AsyncStorage, BuildQueue, Config, @@ -23,6 +24,7 @@ use docs_rs::{ use futures_util::StreamExt; use humantime::Duration; use once_cell::sync::OnceCell; +use rusqlite::{Connection, OpenFlags}; use sentry::TransactionContext; use tokio::runtime::{Builder, Runtime}; use tracing_log::LogTracer; @@ -509,6 +511,9 @@ enum DatabaseSubcommand { /// temporary commant to update the `crates.latest_version_id` field UpdateLatestVersionId, + /// temporary command to rebuild a subset of the archive indexes + FixBrokenArchiveIndexes, + /// Updates Github/Gitlab stats for crates. UpdateRepositoryFields, @@ -567,6 +572,80 @@ impl DatabaseSubcommand { .context("Failed to run database migrations")? } + Self::FixBrokenArchiveIndexes => { + let pool = ctx.pool()?; + let build_queue = ctx.build_queue()?; + ctx.runtime()? + .block_on(async { + let storage = ctx.async_storage().await?; + let mut conn = pool.get_async().await?; + let mut result_stream = sqlx::query!( + " + SELECT c.name, r.version, r.release_time + FROM crates c, releases r + WHERE c.id = r.crate_id + ORDER BY r.id + " + ) + .fetch(&mut *conn); + + while let Some(row) = result_stream.next().await { + let row = row?; + + println!( + "checking index for {} {} ({:?})", + row.name, row.version, row.release_time + ); + + for path in &[ + rustdoc_archive_path(&row.name, &row.version), + source_archive_path(&row.name, &row.version), + ] { + let local_archive_index_filename = match storage + .download_archive_index(path, 42) + .await + { + Ok(path) => path, + Err(err) + if err.downcast_ref::().is_some() => + { + continue + } + Err(err) => return Err(err), + }; + + let count = { + let connection = Connection::open_with_flags( + &local_archive_index_filename, + OpenFlags::SQLITE_OPEN_READ_ONLY + | OpenFlags::SQLITE_OPEN_NO_MUTEX, + )?; + let mut stmt = + connection.prepare("SELECT count(*) FROM files")?; + + stmt.query_row([], |row| Ok(row.get::<_, usize>(0)))?? + }; + + fs::remove_file(&local_archive_index_filename)?; + + if count >= 65000 { + println!("...big index, queueing rebuild"); + spawn_blocking({ + let build_queue = build_queue.clone(); + let name = row.name.clone(); + let version = row.version.clone(); + move || build_queue.add_crate(&name, &version, 5, None) + }) + .await?; + } + } + } + + Ok::<(), anyhow::Error>(()) + }) + .context("Failed to queue rebuilds for big documentation sizes")? + } + Self::UpdateLatestVersionId => { let pool = ctx.pool()?; ctx.runtime()? @@ -581,7 +660,7 @@ impl DatabaseSubcommand { while let Some(row) = result_stream.next().await { let row = row?; - println!("handling crate {}", row.name); + println!("handling crate {} ", row.name); db::update_latest_version_id(&mut update_conn, row.id).await?; } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a79cba14b..b6a770feb 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -28,7 +28,7 @@ type FileRange = RangeInclusive; #[derive(Debug, thiserror::Error)] #[error("path not found")] -pub(crate) struct PathNotFoundError; +pub struct PathNotFoundError; #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub(crate) struct Blob { @@ -304,7 +304,7 @@ impl AsyncStorage { } #[instrument] - pub(super) async fn download_archive_index( + pub async fn download_archive_index( &self, archive_path: &str, latest_build_id: i32, @@ -823,11 +823,11 @@ fn detect_mime(file_path: impl AsRef) -> &'static str { } } -pub(crate) fn rustdoc_archive_path(name: &str, version: &str) -> String { +pub fn rustdoc_archive_path(name: &str, version: &str) -> String { format!("rustdoc/{name}/{version}.zip") } -pub(crate) fn source_archive_path(name: &str, version: &str) -> String { +pub fn source_archive_path(name: &str, version: &str) -> String { format!("sources/{name}/{version}.zip") } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index b6fc8c926..921ef00f0 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -110,7 +110,7 @@ where /// }) /// .await? /// ``` -pub(crate) async fn spawn_blocking(f: F) -> Result +pub async fn spawn_blocking(f: F) -> Result where F: FnOnce() -> Result + Send + 'static, R: Send + 'static,