Skip to content

Commit

Permalink
Move the typosquat cache into Environment.
Browse files Browse the repository at this point in the history
  • Loading branch information
LawnGnome committed Oct 13, 2023
1 parent 3f1d514 commit 1c7fb6a
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 46 deletions.
10 changes: 10 additions & 0 deletions src/background_jobs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::swirl::errors::EnqueueError;
use crate::swirl::PerformError;
use crate::worker::cloudfront::CloudFront;
use crate::worker::fastly::Fastly;
use crate::worker::typosquat::Cache as TyposquatCache;
use crate::{worker, Emails};
use crates_io_index::Repository;

Expand Down Expand Up @@ -348,6 +349,7 @@ pub struct Environment {
emails: Emails,
fastly: Option<Fastly>,
pub storage: AssertUnwindSafe<Arc<Storage>>,
typosquat_cache: TyposquatCache,
}

impl Environment {
Expand All @@ -358,6 +360,7 @@ impl Environment {
emails: Emails,
fastly: Option<Fastly>,
storage: Arc<Storage>,
typosquat_cache: TyposquatCache,
) -> Self {
Self::new_shared(
Arc::new(Mutex::new(index)),
Expand All @@ -366,6 +369,7 @@ impl Environment {
emails,
fastly,
storage,
typosquat_cache,
)
}

Expand All @@ -376,6 +380,7 @@ impl Environment {
emails: Emails,
fastly: Option<Fastly>,
storage: Arc<Storage>,
typosquat_cache: TyposquatCache,
) -> Self {
Self {
index,
Expand All @@ -384,6 +389,7 @@ impl Environment {
emails,
fastly,
storage: AssertUnwindSafe(storage),
typosquat_cache,
}
}

Expand All @@ -410,4 +416,8 @@ impl Environment {
pub(crate) fn fastly(&self) -> Option<&Fastly> {
self.fastly.as_ref()
}

pub(crate) fn typosquat_cache(&self) -> &TyposquatCache {
&self.typosquat_cache
}
}
14 changes: 11 additions & 3 deletions src/bin/background-worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use std::thread::sleep;
use std::time::{Duration, Instant};

use crates_io::swirl;
use crates_io::worker::fastly::Fastly;
use crates_io::worker::{fastly::Fastly, typosquat::Cache as TyposquatCache};

fn main() {
let _sentry = crates_io::sentry::init();
Expand Down Expand Up @@ -77,14 +77,22 @@ fn main() {
let emails = Emails::from_environment(&config);
let fastly = Fastly::from_environment();
let storage = Arc::new(Storage::from_config(&config.storage));
let typosquat_cache = TyposquatCache::default();

let client = Client::builder()
.timeout(Duration::from_secs(45))
.build()
.expect("Couldn't build client");

let environment =
Environment::new_shared(repository, client, cloudfront, emails, fastly, storage);
let environment = Environment::new_shared(
repository,
client,
cloudfront,
emails,
fastly,
storage,
typosquat_cache,
);

let environment = Arc::new(Some(environment));

Expand Down
2 changes: 2 additions & 0 deletions src/tests/util/test_app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use super::{MockAnonymousUser, MockCookieUser, MockTokenUser};
use crate::util::{chaosproxy::ChaosProxy, fresh_schema::FreshSchema};
use crates_io::config::{self, BalanceCapacityConfig, Base, DatabasePools, DbPoolConfig};
use crates_io::storage::StorageConfig;
use crates_io::worker::typosquat::Cache as TyposquatCache;
use crates_io::{background_jobs::Environment, env, App, Emails, Env};
use crates_io_index::testing::UpstreamIndex;
use crates_io_index::{Credentials, Repository as WorkerRepository, RepositoryConfig};
Expand Down Expand Up @@ -260,6 +261,7 @@ impl TestAppBuilder {
Emails::new_in_memory(),
None,
app.storage.clone(),
TyposquatCache::default(),
);

Some(Runner::test_runner(
Expand Down
2 changes: 1 addition & 1 deletion src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub mod dump_db;
pub mod fastly;
mod git;
mod readmes;
mod typosquat;
pub mod typosquat;
mod update_downloads;

pub(crate) use daily_db_maintenance::perform_daily_db_maintenance;
Expand Down
14 changes: 9 additions & 5 deletions src/worker/typosquat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ mod cache;
mod config;
mod types;

pub use cache::Cache;

#[cfg(test)]
mod test_util;

Expand All @@ -18,18 +20,19 @@ pub fn check_typosquat(
conn: &mut PgConnection,
name: &str,
) -> Result<(), PerformError> {
check_typosquat_inner(env.emails(), conn, name)
check_typosquat_inner(env.emails(), env.typosquat_cache(), conn, name)
}

fn check_typosquat_inner(
emails: &Emails,
cache: &Cache,
conn: &mut PgConnection,
name: &str,
) -> Result<(), PerformError> {
info!("Checking new crate for potential typosquatting");

let krate: Box<dyn Package> = Box::new(OwnedCrate::from_name(conn, name)?);
let squats = cache::get_harness(conn)?.check_package(name, krate)?;
let squats = cache.get(conn)?.check_package(name, krate)?;
if !squats.is_empty() {
// Well, well, well. For now, the only action we'll take is to e-mail people who hopefully
// care to check into things more closely.
Expand Down Expand Up @@ -66,7 +69,8 @@ mod tests {
// Note that there's theoretical flakiness here if the test takes longer to run than the
// cache TTL. Of course, since the cache TTL is currently set to 12 hours, that would
// probably indicate bigger problems.
let _harness = super::cache::get_harness(faker.borrow_conn())?;
let cache = Cache::default();
let _harness = cache.get(faker.borrow_conn())?;

// Now we'll create new crates: one problematic, one not so.
let other_user = faker.user("b")?;
Expand All @@ -87,11 +91,11 @@ mod tests {
let mut conn = faker.into_conn();

// Run the check with a crate that shouldn't cause problems.
check_typosquat_inner(&emails, &mut conn, &angel.name)?;
check_typosquat_inner(&emails, &cache, &mut conn, &angel.name)?;
assert!(emails.mails_in_memory().unwrap().is_empty());

// Now run the check with a less innocent crate.
check_typosquat_inner(&emails, &mut conn, &demon.name)?;
check_typosquat_inner(&emails, &cache, &mut conn, &demon.name)?;
assert!(!emails.mails_in_memory().unwrap().is_empty());

Ok(())
Expand Down
39 changes: 10 additions & 29 deletions src/worker/typosquat/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,10 @@ use crate::swirl::PerformError;

use super::{config, types::TopCrates};

/// Gets the typomania harness for the cached top crates, regenerating it if it is out of date.
pub(super) fn get_harness(
conn: &mut PgConnection,
) -> Result<Arc<Harness<TopCrates>>, PerformError> {
HARNESS_CACHE.get(conn)
}

static HARNESS_CACHE: Cache = Cache::new();

struct Cache(Mutex<Inner>);
pub struct Cache(Mutex<Inner>);

impl Cache {
const fn new() -> Self {
Self(Mutex::new(Inner::new()))
}

fn get(&self, conn: &mut PgConnection) -> Result<Arc<Harness<TopCrates>>, PerformError> {
pub fn get(&self, conn: &mut PgConnection) -> Result<Arc<Harness<TopCrates>>, PerformError> {
let mut inner = self.0.lock().unwrap();

Ok(if let Some(harness) = inner.get() {
Expand All @@ -43,6 +30,12 @@ impl Cache {
}
}

impl Default for Cache {
fn default() -> Self {
Self(Mutex::new(Inner::new()))
}
}

#[instrument(skip_all)]
fn new_harness(conn: &mut PgConnection) -> Result<Harness<TopCrates>, PerformError> {
debug!("Rebuilding top crate cache");
Expand Down Expand Up @@ -71,30 +64,18 @@ fn new_harness(conn: &mut PgConnection) -> Result<Harness<TopCrates>, PerformErr

struct Inner {
harness: Option<Arc<Harness<TopCrates>>>,
last_update: Option<Instant>,
}

impl Inner {
const fn new() -> Self {
Self {
harness: None,
last_update: None,
}
Self { harness: None }
}

fn get(&self) -> Option<Arc<Harness<TopCrates>>> {
if let Some(harness) = &self.harness {
if let Some(when) = self.last_update {
if when >= Instant::now() - config::CACHE_TTL {
return Some(harness.clone());
}
}
}
None
self.harness.clone()
}

fn update(&mut self, harness: Arc<Harness<TopCrates>>) {
self.harness = Some(harness);
self.last_update = Some(Instant::now());
}
}
5 changes: 0 additions & 5 deletions src/worker/typosquat/config.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
//! Things that should really be read from a configuration file, but are just hardcoded while we
//! experiment with this.
use std::time::Duration;

/// How long to cache the top crates for.
pub(super) static CACHE_TTL: Duration = Duration::from_secs(12 * 60 * 60);

/// Valid characters in crate names.
pub(super) static CRATE_NAME_ALPHABET: &str =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890-_";
Expand Down
6 changes: 3 additions & 3 deletions src/worker/typosquat/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ use crate::{
/// A corpus of the current top crates on crates.io, as determined by their download counts, along
/// with their ownership information so we can quickly check if a new crate shares owners with a
/// top crate.
pub(super) struct TopCrates {
pub struct TopCrates {
crates: HashMap<String, OwnedCrate>,
}

impl TopCrates {
/// Retrieves the `num` top crates from the database.
pub(super) fn new(conn: &mut PgConnection, num: i64) -> Result<Self, PerformError> {
pub fn new(conn: &mut PgConnection, num: i64) -> Result<Self, PerformError> {
// We have to build up a data structure that contains the top crates, their owners in some
// form that is easily compared, and that can be indexed by the crate name.
//
Expand Down Expand Up @@ -87,7 +87,7 @@ impl TopCrates {
}

/// Iterates over the names of the top crates.
pub(super) fn iter_names(&self) -> impl Iterator<Item = &str> {
pub fn iter_names(&self) -> impl Iterator<Item = &str> {
self.crates.keys().map(String::as_str)
}
}
Expand Down

0 comments on commit 1c7fb6a

Please sign in to comment.