From f79b97f3a9f31e83535f14ed9e3fd7c02a180061 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Wed, 21 Apr 2021 20:12:12 +0200 Subject: [PATCH] Resolves #530 - makes less database calls in add_package logic --- src/db/add_package.rs | 164 ++++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 79 deletions(-) diff --git a/src/db/add_package.rs b/src/db/add_package.rs index ee35b91b2..bf78824ee 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -1,4 +1,5 @@ use std::{ + collections::{HashMap, HashSet}, fs, io::{BufRead, BufReader}, path::Path, @@ -316,26 +317,42 @@ fn add_keywords_into_database( pkg: &MetadataPackage, release_id: i32, ) -> Result<()> { - for keyword in &pkg.keywords { - let slug = slugify(&keyword); - let keyword_id: i32 = { - let rows = conn.query("SELECT id FROM keywords WHERE slug = $1", &[&slug])?; - if !rows.is_empty() { - rows[0].get(0) - } else { - conn.query( - "INSERT INTO keywords (name, slug) VALUES ($1, $2) RETURNING id", - &[&keyword, &slug], - )?[0] - .get(0) - } - }; + let wanted_keywords: HashMap = pkg + .keywords + .iter() + .map(|kw| (slugify(&kw), kw.clone())) + .collect(); + + let existing_keyword_slugs: HashSet = conn + .query( + "SELECT slug FROM keywords WHERE slug = ANY($1)", + &[&wanted_keywords.keys().collect::>()], + )? + .iter() + .map(|row| row.get(0)) + .collect(); + + // we create new keywords one-by-one, since most of the time we already have them, + // and because support for multi-record inserts is a mess without adding a new + // library + let insert_keyword_query = conn.prepare("INSERT INTO keywords (name, slug) VALUES ($1, $2)")?; + for (slug, name) in wanted_keywords + .iter() + .filter(|(k, _)| !(existing_keyword_slugs.contains(*k))) + { + conn.execute(&insert_keyword_query, &[&name, &slug])?; + } + + let affected_rows = conn.execute( + "INSERT INTO keyword_rels (rid, kid) + SELECT $1 as rid, id as kid + FROM keywords + WHERE slug = ANY($2)", + &[&release_id, &wanted_keywords.keys().collect::>()], + )?; - // add releationship - let _ = conn.query( - "INSERT INTO keyword_rels (rid, kid) VALUES ($1, $2)", - &[&release_id, &keyword_id], - ); + if affected_rows != pkg.keywords.len() as u64 { + failure::bail!("not all keywords added to database"); } Ok(()) @@ -347,7 +364,9 @@ pub fn update_crate_data_in_database( registry_data: &CrateData, ) -> Result<()> { info!("Updating crate data for {}", name); - let crate_id = conn.query("SELECT id FROM crates WHERE crates.name = $1", &[&name])?[0].get(0); + let crate_id = conn + .query_one("SELECT id FROM crates WHERE crates.name = $1", &[&name])? + .get(0); update_owners_in_database(conn, ®istry_data.owners, crate_id)?; @@ -360,64 +379,51 @@ fn update_owners_in_database( owners: &[CrateOwner], crate_id: i32, ) -> Result<()> { - let rows = conn.query( - " - SELECT login - FROM owners - INNER JOIN owner_rels - ON owner_rels.oid = owners.id - WHERE owner_rels.cid = $1 - ", - &[&crate_id], + // Update any existing owner data since it is mutable and could have changed since last + // time we pulled it + let owner_upsert = conn.prepare( + "INSERT INTO owners (login, avatar, name, email) + VALUES ($1, $2, $3, $4) + ON CONFLICT (login) DO UPDATE + SET + avatar = EXCLUDED.avatar, + name = EXCLUDED.name, + email = EXCLUDED.email", )?; - let existing_owners = rows.into_iter().map(|row| -> String { row.get(0) }); - for owner in owners { - debug!("Updating owner data for {}: {:?}", owner.login, owner); - - // Update any existing owner data since it is mutable and could have changed since last - // time we pulled it - let owner_id: i32 = { - conn.query( - " - INSERT INTO owners (login, avatar, name, email) - VALUES ($1, $2, $3, $4) - ON CONFLICT (login) DO UPDATE - SET - avatar = $2, - name = $3, - email = $4 - RETURNING id - ", - &[&owner.login, &owner.avatar, &owner.name, &owner.email], - )?[0] - .get(0) - }; - - // add relationship - conn.query( - "INSERT INTO owner_rels (cid, oid) VALUES ($1, $2) ON CONFLICT DO NOTHING", - &[&crate_id, &owner_id], + conn.execute( + &owner_upsert, + &[&owner.login, &owner.avatar, &owner.name, &owner.email], )?; } - let to_remove = - existing_owners.filter(|login| !owners.iter().any(|owner| &owner.login == login)); - - for login in to_remove { - debug!("Removing owner relationship {}", login); - // remove relationship - conn.query( - " - DELETE FROM owner_rels - USING owners - WHERE owner_rels.cid = $1 - AND owner_rels.oid = owners.id - AND owners.login = $2 - ", - &[&crate_id, &login], - )?; - } + let updated_oids: Vec = conn + .query( + "INSERT INTO owner_rels (cid, oid) + SELECT $1,id + FROM owners + WHERE login = ANY($2) + ON CONFLICT (cid,oid) + DO UPDATE -- we need this so the existing/updated records end + -- up being in the returned OIDs + SET oid=excluded.oid + RETURNING oid", + &[ + &crate_id, + &owners.iter().map(|o| o.login.clone()).collect::>(), + ], + )? + .iter() + .map(|row| row.get(0)) + .collect(); + + conn.execute( + "DELETE FROM owner_rels + WHERE + cid = $1 AND + NOT (oid = ANY($2))", + &[&crate_id, &updated_oids], + )?; Ok(()) } @@ -427,13 +433,13 @@ fn add_compression_into_database(conn: &mut Client, algorithms: I, release_id where I: Iterator, { - let sql = " - INSERT INTO compression_rels (release, algorithm) - VALUES ($1, $2) - ON CONFLICT DO NOTHING;"; - let prepared = conn.prepare(sql)?; + let prepared = conn.prepare( + "INSERT INTO compression_rels (release, algorithm) + VALUES ($1, $2) + ON CONFLICT DO NOTHING;", + )?; for alg in algorithms { - conn.query(&prepared, &[&release_id, &(alg as i32)])?; + conn.execute(&prepared, &[&release_id, &(alg as i32)])?; } Ok(()) }