-
Notifications
You must be signed in to change notification settings - Fork 626
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
worker: add a job to check for potential typosquatting
This only fires when new crates are published: updates to existing crates will not cause this job to run. On a technical level, the major impact here is that the background worker will keep an in memory cache of the top 3000 crates and their owners. I don't expect the impact of this to be significant in practice. As this is an experiment at present, configuration is hardcoded into the new worker job module. If this becomes a longer term thing, this would be split out into our normal configuration system for easier management.
- Loading branch information
Showing
11 changed files
with
689 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
use diesel::PgConnection; | ||
use typomania::Package; | ||
|
||
use crate::{background_jobs::Environment, swirl::PerformError, Emails}; | ||
|
||
use self::types::OwnedCrate; | ||
|
||
mod cache; | ||
mod config; | ||
mod types; | ||
|
||
#[cfg(test)] | ||
mod test_util; | ||
|
||
#[instrument(skip_all, fields(krate.name = ?name))] | ||
pub fn check_typosquat( | ||
env: &Environment, | ||
conn: &mut PgConnection, | ||
name: &str, | ||
) -> Result<(), PerformError> { | ||
check_typosquat_inner(env.emails(), conn, name) | ||
} | ||
|
||
fn check_typosquat_inner( | ||
emails: &Emails, | ||
conn: &mut PgConnection, | ||
name: &str, | ||
) -> Result<(), PerformError> { | ||
info!("Checking new crate for potential typosquatting"); | ||
|
||
let krate: Box<dyn Package> = Box::new(OwnedCrate::from_name(conn, name)?); | ||
let squats = cache::get_harness(conn)?.check_package(name, krate)?; | ||
if !squats.is_empty() { | ||
// Well, well, well. For now, the only action we'll take is to e-mail people who hopefully | ||
// care to check into things more closely. | ||
info!(?squats, "Found potential typosquatting"); | ||
|
||
for email in config::NOTIFY_EMAILS.iter() { | ||
if let Err(e) = emails.send_possible_typosquat_notification(email, name, &squats) { | ||
error!(?e, ?email, "sending possible typosquat notification"); | ||
} | ||
} | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::test_util::pg_connection; | ||
|
||
use super::test_util::Faker; | ||
use super::*; | ||
|
||
#[test] | ||
fn integration() -> Result<(), PerformError> { | ||
let emails = Emails::new_in_memory(); | ||
let mut faker = Faker::new(pg_connection()); | ||
|
||
// Set up a user and a crate to match against. | ||
let user = faker.user("a")?; | ||
faker.crate_and_version("my-crate", "It's awesome", &user, 100)?; | ||
|
||
// Prime the cache so it only includes the crate we just created. | ||
// | ||
// Note that there's theoretical flakiness here if the test takes longer to run than the | ||
// cache TTL. Of course, since the cache TTL is currently set to 12 hours, that would | ||
// probably indicate bigger problems. | ||
let _harness = super::cache::get_harness(faker.borrow_conn())?; | ||
|
||
// Now we'll create new crates: one problematic, one not so. | ||
let other_user = faker.user("b")?; | ||
let (angel, _version) = faker.crate_and_version( | ||
"innocent-crate", | ||
"I'm just a simple, innocent crate", | ||
&other_user, | ||
0, | ||
)?; | ||
let (demon, _version) = faker.crate_and_version( | ||
"mycrate", | ||
"I'm even more innocent, obviously", | ||
&other_user, | ||
0, | ||
)?; | ||
|
||
// OK, we're done faking stuff. | ||
let mut conn = faker.into_conn(); | ||
|
||
// Run the check with a crate that shouldn't cause problems. | ||
check_typosquat_inner(&emails, &mut conn, &angel.name)?; | ||
assert!(emails.mails_in_memory().unwrap().is_empty()); | ||
|
||
// Now run the check with a less innocent crate. | ||
check_typosquat_inner(&emails, &mut conn, &demon.name)?; | ||
assert!(!emails.mails_in_memory().unwrap().is_empty()); | ||
|
||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
use std::{ | ||
sync::{Arc, Mutex}, | ||
time::Instant, | ||
}; | ||
|
||
use diesel::PgConnection; | ||
use typomania::{ | ||
checks::{Bitflips, Omitted, SwappedWords, Typos}, | ||
Harness, | ||
}; | ||
|
||
use crate::swirl::PerformError; | ||
|
||
use super::{config, types::TopCrates}; | ||
|
||
/// Gets the typomania harness for the cached top crates, regenerating it if it is out of date. | ||
pub(super) fn get_harness( | ||
conn: &mut PgConnection, | ||
) -> Result<Arc<Harness<TopCrates>>, PerformError> { | ||
HARNESS_CACHE.get(conn) | ||
} | ||
|
||
static HARNESS_CACHE: Cache = Cache::new(); | ||
|
||
struct Cache(Mutex<Inner>); | ||
|
||
impl Cache { | ||
const fn new() -> Self { | ||
Self(Mutex::new(Inner::new())) | ||
} | ||
|
||
fn get(&self, conn: &mut PgConnection) -> Result<Arc<Harness<TopCrates>>, PerformError> { | ||
let mut inner = self.0.lock().unwrap(); | ||
|
||
Ok(if let Some(harness) = inner.get() { | ||
harness | ||
} else { | ||
let harness = Arc::new(new_harness(conn)?); | ||
|
||
inner.update(harness.clone()); | ||
harness | ||
}) | ||
} | ||
} | ||
|
||
#[instrument(skip_all)] | ||
fn new_harness(conn: &mut PgConnection) -> Result<Harness<TopCrates>, PerformError> { | ||
debug!("Rebuilding top crate cache"); | ||
let start = Instant::now(); | ||
|
||
let top_crates = TopCrates::new(conn, config::TOP_CRATES)?; | ||
|
||
// This is essentially the standard set of checks that was implemented by typogard-crates. | ||
let harness = Harness::builder() | ||
.with_check(Bitflips::new( | ||
config::CRATE_NAME_ALPHABET, | ||
top_crates.iter_names(), | ||
)) | ||
.with_check(Omitted::new(config::CRATE_NAME_ALPHABET)) | ||
.with_check(SwappedWords::new("-_")) | ||
.with_check(Typos::new(config::TYPOS.iter().map(|(c, typos)| { | ||
(*c, typos.iter().map(|ss| ss.to_string()).collect()) | ||
}))) | ||
.build(top_crates); | ||
|
||
let elapsed = Instant::now() - start; | ||
debug!(?elapsed, "Top crate cache rebuilt"); | ||
|
||
Ok(harness) | ||
} | ||
|
||
struct Inner { | ||
harness: Option<Arc<Harness<TopCrates>>>, | ||
last_update: Option<Instant>, | ||
} | ||
|
||
impl Inner { | ||
const fn new() -> Self { | ||
Self { | ||
harness: None, | ||
last_update: None, | ||
} | ||
} | ||
|
||
fn get(&self) -> Option<Arc<Harness<TopCrates>>> { | ||
if let Some(harness) = &self.harness { | ||
if let Some(when) = self.last_update { | ||
if when >= Instant::now() - config::CACHE_TTL { | ||
return Some(harness.clone()); | ||
} | ||
} | ||
} | ||
None | ||
} | ||
|
||
fn update(&mut self, harness: Arc<Harness<TopCrates>>) { | ||
self.harness = Some(harness); | ||
self.last_update = Some(Instant::now()); | ||
} | ||
} |
Oops, something went wrong.