From 070411c067577d85437291eb6f72b601309df99f Mon Sep 17 00:00:00 2001 From: Lukas Piatkowski Date: Wed, 29 Jul 2020 05:56:09 -0700 Subject: [PATCH 1/3] mononoke/integration test: make backsyncer_cmd public Summary: This command is used in some integration tests, make it public. Differential Revision: D22792846 fbshipit-source-id: 48c1d5574fbc988439ab76f6d6a0889bd8436f23 --- .../commit_rewriting/backsyncer/Cargo.toml | 23 +- .../commit_rewriting/backsyncer/src/main.rs | 440 ++++++++++++++++++ .../tests/integration/certs/gencerts.sh | 5 +- eden/mononoke/tests/integration/manifest_deps | 1 + .../tests/integration/run_tests_getdeps.py | 3 +- .../integration/test-backsyncer-merges.t | 4 +- 6 files changed, 467 insertions(+), 9 deletions(-) create mode 100644 eden/mononoke/commit_rewriting/backsyncer/src/main.rs diff --git a/eden/mononoke/commit_rewriting/backsyncer/Cargo.toml b/eden/mononoke/commit_rewriting/backsyncer/Cargo.toml index e099956a3cfc4..a763271498fd8 100644 --- a/eden/mononoke/commit_rewriting/backsyncer/Cargo.toml +++ b/eden/mononoke/commit_rewriting/backsyncer/Cargo.toml @@ -4,31 +4,48 @@ edition = "2018" version = "0.1.0" authors = ['Facebook'] license = "GPLv2+" -include = ["src/**/*.rs"] +include = ["src/lib.rs", "src/main.rs", "src/tests.rs"] + +[lib] +path = "src/lib.rs" + +[[bin]] +name = "backsyncer_cmd" +path = "src/main.rs" [dependencies] blobrepo = { path = "../../blobrepo" } blobrepo_factory = { path = "../../blobrepo/factory" } +blobrepo_hg = { path = "../../blobrepo/blobrepo_hg" } blobstore_factory = { path = "../../blobstore/factory" } bookmarks = { path = "../../bookmarks" } +cmdlib = { path = "../../cmdlib" } +cmdlib_x_repo = { path = "../../cmdlib/x_repo" } context = { path = "../../server/context" } cross_repo_sync = { path = "../cross_repo_sync" } +live_commit_sync_config = { path = "../live_commit_sync_config" } +mercurial_types = { path = "../../mercurial/types" } metaconfig_types = { path = "../../metaconfig/types" } mononoke_types = { path = "../../mononoke_types" } mutable_counters = { path = "../../mutable_counters" } +scuba_ext = { path = "../../common/scuba_ext" } sql_construct = { path = "../../common/sql_construct" } sql_ext = { path = "../../common/rust/sql_ext" } synced_commit_mapping = { path = "../synced_commit_mapping" } +cached_config = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } cloned = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +fbinit = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } sql = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +stats = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } anyhow = "1.0" +clap = "2.33" futures = { version = "0.3.5", features = ["async-await", "compat"] } futures-old = { package = "futures", version = "0.1" } slog = { version = "2.5", features = ["max_level_debug"] } thiserror = "1.0" +tokio = { version = "=0.2.13", features = ["full"] } [dev-dependencies] -blobrepo_hg = { path = "../../blobrepo/blobrepo_hg" } blobrepo_override = { path = "../../blobrepo/override" } blobstore = { path = "../../blobstore" } bookmark_renaming = { path = "../bookmark_renaming" } @@ -36,12 +53,10 @@ dbbookmarks = { path = "../../bookmarks/dbbookmarks" } filestore = { path = "../../filestore" } fixtures = { path = "../../tests/fixtures" } manifest = { path = "../../manifest" } -mercurial_types = { path = "../../mercurial/types" } movers = { path = "../movers" } revset = { path = "../../revset" } skiplist = { path = "../../reachabilityindex/skiplist" } tests_utils = { path = "../../tests/utils" } -fbinit = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } futures_ext = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } maplit = "1.0" pretty_assertions = "0.6" diff --git a/eden/mononoke/commit_rewriting/backsyncer/src/main.rs b/eden/mononoke/commit_rewriting/backsyncer/src/main.rs new file mode 100644 index 0000000000000..e74d4c9661eb4 --- /dev/null +++ b/eden/mononoke/commit_rewriting/backsyncer/src/main.rs @@ -0,0 +1,440 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +#![deny(warnings)] + +use anyhow::{bail, format_err, Error}; +use backsyncer::{ + backsync_latest, format_counter, open_backsyncer_dbs, BacksyncLimit, TargetRepoDbs, +}; +use blobrepo_hg::BlobRepoHg; +use bookmarks::Freshness; +use cached_config::ConfigStore; +use clap::{Arg, SubCommand}; +use cloned::cloned; +use cmdlib::{args, monitoring}; +use cmdlib_x_repo::{create_commit_syncer_args_from_matches, create_commit_syncer_from_matches}; +use context::{CoreContext, SessionContainer}; +use cross_repo_sync::{CommitSyncOutcome, CommitSyncer, CommitSyncerArgs}; +use fbinit::FacebookInit; +use futures::{ + compat::Future01CompatExt, + future::FutureExt, + stream::{self, StreamExt, TryStreamExt}, + try_join, +}; +use futures_old::{stream::Stream as Stream_old, Future}; +use live_commit_sync_config::{CfgrLiveCommitSyncConfig, LiveCommitSyncConfig}; +use mercurial_types::HgChangesetId; +use mononoke_types::ChangesetId; +use mutable_counters::MutableCounters; +use scuba_ext::ScubaSampleBuilder; +use slog::{debug, info}; +use stats::prelude::*; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::str::FromStr; +use std::time::Duration; +use synced_commit_mapping::{SqlSyncedCommitMapping, SyncedCommitMapping}; + +const ARG_MODE_BACKSYNC_FOREVER: &str = "backsync-forever"; +const ARG_MODE_BACKSYNC_ALL: &str = "backsync-all"; +const ARG_MODE_BACKSYNC_COMMITS: &str = "backsync-commits"; +const ARG_BATCH_SIZE: &str = "batch-size"; +const ARG_INPUT_FILE: &str = "INPUT_FILE"; +const SCUBA_TABLE: &'static str = "mononoke_xrepo_backsync"; + +define_stats! { + prefix = "mononoke.backsyncer"; + remaining_entries: dynamic_singleton_counter( + "{}.{}.remaining_entries", + (source_repo_name: String, target_repo_name: String) + ), + delay_secs: dynamic_singleton_counter( + "{}.{}.delay_secs", + (source_repo_name: String, target_repo_name: String) + ), +} + +fn extract_cs_id_from_sync_outcome( + source_cs_id: ChangesetId, + maybe_sync_outcome: Option, +) -> Result, Error> { + use CommitSyncOutcome::*; + + match maybe_sync_outcome { + Some(Preserved) => Ok(Some(source_cs_id)), + Some(RewrittenAs(cs_id, _)) => Ok(Some(cs_id)), + Some(NotSyncCandidate) => Ok(None), + Some(EquivalentWorkingCopyAncestor(cs_id)) => Ok(Some(cs_id)), + None => Err(format_err!( + "sync outcome is not available for {}", + source_cs_id + )), + } +} + +async fn derive_target_hg_changesets( + ctx: &CoreContext, + maybe_target_cs_id: Option, + commit_syncer: &CommitSyncer, +) -> Result<(), Error> { + match maybe_target_cs_id { + Some(target_cs_id) => { + commit_syncer + .get_target_repo() + .get_hg_from_bonsai_changeset(ctx.clone(), target_cs_id) + .map(move |hg_cs_id| { + info!( + ctx.logger(), + "Hg cs id {} derived for {}", hg_cs_id, target_cs_id + ); + }) + .compat() + .await + } + None => Ok(()), + } +} + +pub async fn backsync_forever( + ctx: CoreContext, + config_store: ConfigStore, + commit_syncer_args: CommitSyncerArgs, + target_repo_dbs: TargetRepoDbs, + source_repo_name: String, + target_repo_name: String, +) -> Result<(), Error> +where + M: SyncedCommitMapping + Clone + 'static, +{ + let target_repo_id = commit_syncer_args.get_target_repo_id(); + let live_commit_sync_config = CfgrLiveCommitSyncConfig::new(ctx.logger(), &config_store)?; + + loop { + // We only care about public pushes because draft pushes are not in the bookmark + // update log at all. + let enabled = live_commit_sync_config.push_redirector_enabled_for_public(target_repo_id); + + if enabled { + let delay = calculate_delay(&ctx, &commit_syncer_args, &target_repo_dbs).await?; + log_delay(&ctx, &delay, &source_repo_name, &target_repo_name); + if delay.remaining_entries == 0 { + debug!(ctx.logger(), "no entries remained"); + tokio::time::delay_for(Duration::new(1, 0)).await; + } else { + debug!(ctx.logger(), "backsyncing..."); + let commit_sync_config = + live_commit_sync_config.get_current_commit_sync_config(&ctx, target_repo_id)?; + + let commit_syncer = commit_syncer_args + .clone() + .try_into_commit_syncer(&commit_sync_config)?; + + backsync_latest( + ctx.clone(), + commit_syncer, + target_repo_dbs.clone(), + BacksyncLimit::NoLimit, + ) + .await? + } + } else { + debug!(ctx.logger(), "push redirector is disabled"); + let delay = Delay::no_delay(); + log_delay(&ctx, &delay, &source_repo_name, &target_repo_name); + tokio::time::delay_for(Duration::new(1, 0)).await; + } + } +} + +struct Delay { + delay_secs: i64, + remaining_entries: u64, +} + +impl Delay { + fn no_delay() -> Self { + Self { + delay_secs: 0, + remaining_entries: 0, + } + } +} + +// Returns logs delay and returns the number of remaining bookmark update log entries +async fn calculate_delay( + ctx: &CoreContext, + commit_syncer_args: &CommitSyncerArgs, + target_repo_dbs: &TargetRepoDbs, +) -> Result +where + M: SyncedCommitMapping + Clone + 'static, +{ + let TargetRepoDbs { ref counters, .. } = target_repo_dbs; + let target_repo_id = commit_syncer_args.get_target_repo().get_repoid(); + let source_repo_id = commit_syncer_args.get_source_repo().get_repoid(); + + let counter_name = format_counter(&source_repo_id); + let maybe_counter = counters + .get_counter(ctx.clone(), target_repo_id, &counter_name) + .compat() + .await?; + let counter = maybe_counter.ok_or(format_err!("{} counter not found", counter_name))?; + let source_repo = commit_syncer_args.get_source_repo(); + let next_entry = source_repo + .read_next_bookmark_log_entries(ctx.clone(), counter as u64, 1, Freshness::MostRecent) + .collect() + .compat(); + let remaining_entries = source_repo + .count_further_bookmark_log_entries(ctx.clone(), counter as u64, None) + .compat(); + + let (next_entry, remaining_entries) = try_join!(next_entry, remaining_entries)?; + let delay_secs = next_entry + .get(0) + .map(|entry| entry.timestamp.since_seconds()) + .unwrap_or(0); + + Ok(Delay { + delay_secs, + remaining_entries, + }) +} + +fn log_delay( + ctx: &CoreContext, + delay: &Delay, + source_repo_name: &String, + target_repo_name: &String, +) { + STATS::remaining_entries.set_value( + ctx.fb, + delay.remaining_entries as i64, + (source_repo_name.clone(), target_repo_name.clone()), + ); + STATS::delay_secs.set_value( + ctx.fb, + delay.delay_secs, + (source_repo_name.clone(), target_repo_name.clone()), + ); +} + +#[fbinit::main] +fn main(fb: FacebookInit) -> Result<(), Error> { + let app_name = "backsyncer cmd-line tool"; + let app = args::MononokeApp::new(app_name) + .with_fb303_args() + .with_test_args() + .with_source_and_target_repos() + .build(); + let backsync_forever_subcommand = + SubCommand::with_name(ARG_MODE_BACKSYNC_FOREVER).about("Backsyncs all new bookmark moves"); + + let sync_loop = SubCommand::with_name(ARG_MODE_BACKSYNC_COMMITS) + .about("Syncs all commits from the file") + .arg( + Arg::with_name(ARG_INPUT_FILE) + .takes_value(true) + .required(true) + .help("list of hg commits to backsync"), + ) + .arg( + Arg::with_name(ARG_BATCH_SIZE) + .long(ARG_BATCH_SIZE) + .takes_value(true) + .required(false) + .help("how many commits to backsync at once"), + ); + + let backsync_all_subcommand = + SubCommand::with_name(ARG_MODE_BACKSYNC_ALL).about("Backsyncs all new bookmark moves once"); + let app = app + .subcommand(backsync_all_subcommand) + .subcommand(backsync_forever_subcommand) + .subcommand(sync_loop); + let matches = app.get_matches(); + + let (_, logger, mut runtime) = args::init_mononoke(fb, &matches, None)?; + + let source_repo_id = args::get_source_repo_id(fb, &matches)?; + let target_repo_id = args::get_target_repo_id(fb, &matches)?; + + let (source_repo_name, _) = args::get_config_by_repoid(fb, &matches, source_repo_id)?; + let (target_repo_name, target_repo_config) = + args::get_config_by_repoid(fb, &matches, target_repo_id)?; + + let commit_syncer_args = runtime.block_on_std(create_commit_syncer_args_from_matches( + fb, &logger, &matches, + ))?; + let mysql_options = args::parse_mysql_options(&matches); + let readonly_storage = args::parse_readonly_storage(&matches); + + let session_container = SessionContainer::new_with_defaults(fb); + + info!( + logger, + "syncing from repoid {:?} into repoid {:?}", source_repo_id, target_repo_id, + ); + + match matches.subcommand() { + (ARG_MODE_BACKSYNC_ALL, _) => { + // NOTE: this does not use `CfgrLiveCommitSyncConfig`, as I want to allow + // for an opportunity to call this binary in non-forever mode with + // local fs-based configs + let commit_syncer = + runtime.block_on_std(create_commit_syncer_from_matches(fb, &logger, &matches))?; + + let scuba_sample = ScubaSampleBuilder::with_discard(); + let ctx = session_container.new_context(logger.clone(), scuba_sample); + let db_config = target_repo_config.storage_config.metadata; + let target_repo_dbs = runtime.block_on_std( + open_backsyncer_dbs( + ctx.clone(), + commit_syncer.get_target_repo().clone(), + db_config, + mysql_options, + readonly_storage, + ) + .boxed(), + )?; + + runtime.block_on_std( + backsync_latest(ctx, commit_syncer, target_repo_dbs, BacksyncLimit::NoLimit) + .boxed(), + )?; + } + (ARG_MODE_BACKSYNC_FOREVER, _) => { + let db_config = target_repo_config.storage_config.metadata; + let ctx = + session_container.new_context(logger.clone(), ScubaSampleBuilder::with_discard()); + let target_repo_dbs = runtime.block_on_std( + open_backsyncer_dbs( + ctx, + commit_syncer_args.get_target_repo().clone(), + db_config, + mysql_options, + readonly_storage, + ) + .boxed(), + )?; + + let config_store = args::maybe_init_config_store(fb, &logger, &matches) + .ok_or_else(|| format_err!("Failed initializing ConfigStore"))?; + + let mut scuba_sample = ScubaSampleBuilder::new(fb, SCUBA_TABLE); + scuba_sample.add("source_repo", source_repo_id.id()); + scuba_sample.add("source_repo_name", source_repo_name.clone()); + scuba_sample.add("target_repo", target_repo_id.id()); + scuba_sample.add("target_repo_name", target_repo_name.clone()); + scuba_sample.add_common_server_data(); + + let ctx = session_container.new_context(logger.clone(), scuba_sample); + let f = backsync_forever( + ctx, + config_store, + commit_syncer_args, + target_repo_dbs, + source_repo_name, + target_repo_name, + ) + .boxed(); + + monitoring::start_fb303_and_stats_agg( + fb, + &mut runtime, + app_name, + &logger, + &matches, + monitoring::AliveService, + )?; + runtime.block_on_std(f)?; + } + (ARG_MODE_BACKSYNC_COMMITS, Some(sub_m)) => { + // NOTE: this does not use `CfgrLiveCommitSyncConfig`, as I want to allow + // for an opportunity to call this binary in non-forever mode with + // local fs-based configs + let commit_syncer = + runtime.block_on_std(create_commit_syncer_from_matches(fb, &logger, &matches))?; + + let ctx = session_container.new_context(logger, ScubaSampleBuilder::with_discard()); + let inputfile = sub_m + .value_of(ARG_INPUT_FILE) + .expect("input file is not set"); + let inputfile = File::open(inputfile)?; + let file = BufReader::new(&inputfile); + let batch_size = args::get_usize(&matches, ARG_BATCH_SIZE, 100); + + let source_repo = commit_syncer.get_source_repo().clone(); + + let mut hg_cs_ids = vec![]; + for line in file.lines() { + hg_cs_ids.push(HgChangesetId::from_str(&line?)?); + } + let total_to_backsync = hg_cs_ids.len(); + info!(ctx.logger(), "backsyncing {} commits", total_to_backsync); + + let ctx = &ctx; + let commit_syncer = &commit_syncer; + + let f = stream::iter(hg_cs_ids.clone()) + .chunks(batch_size) + .map(Result::<_, Error>::Ok) + .and_then({ + cloned!(ctx); + move |chunk| { + source_repo + .get_hg_bonsai_mapping(ctx.clone(), chunk) + .compat() + } + }) + .try_fold(0, move |backsynced_so_far, hg_bonsai_mapping| { + hg_bonsai_mapping + .into_iter() + .map({ + move |(_, bonsai)| async move { + commit_syncer.sync_commit(&ctx, bonsai.clone()).await?; + + let maybe_sync_outcome = commit_syncer + .get_commit_sync_outcome(ctx.clone(), bonsai) + .await?; + + info!( + ctx.logger(), + "{} backsynced as {:?}", bonsai, maybe_sync_outcome + ); + + let maybe_target_cs_id = + extract_cs_id_from_sync_outcome(bonsai, maybe_sync_outcome)?; + + derive_target_hg_changesets(ctx, maybe_target_cs_id, commit_syncer) + .await + } + }) + .collect::>() + .try_fold(backsynced_so_far, { + move |backsynced_so_far, _| async move { + info!( + ctx.logger(), + "backsynced so far {} out of {}", + backsynced_so_far + 1, + total_to_backsync + ); + Ok::<_, Error>(backsynced_so_far + 1) + } + }) + }); + + runtime.block_on_std(f)?; + } + _ => { + bail!("unknown subcommand"); + } + } + + Ok(()) +} diff --git a/eden/mononoke/tests/integration/certs/gencerts.sh b/eden/mononoke/tests/integration/certs/gencerts.sh index ec5cd18aea90e..b9d0798d8f484 100755 --- a/eden/mononoke/tests/integration/certs/gencerts.sh +++ b/eden/mononoke/tests/integration/certs/gencerts.sh @@ -1,5 +1,8 @@ #!/usr/bin/env bash -# (c) Facebook, Inc. and its affiliates. Confidential and proprietary. +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2. set -x diff --git a/eden/mononoke/tests/integration/manifest_deps b/eden/mononoke/tests/integration/manifest_deps index ef4ffba23bde7..bd0b726b7d944 100644 --- a/eden/mononoke/tests/integration/manifest_deps +++ b/eden/mononoke/tests/integration/manifest_deps @@ -4,6 +4,7 @@ # The values here should correspond to both the name of binary as builded by # getdeps.py's Mononoke project and the buck target. Those names must be unique. MONONOKE_BINS = { + "BACKSYNCER": "backsyncer_cmd", "EDENAPI_SERVER": "edenapi_server", "LFS_SERVER": "lfs_server", "MONONOKE_ADMIN": "admin", diff --git a/eden/mononoke/tests/integration/run_tests_getdeps.py b/eden/mononoke/tests/integration/run_tests_getdeps.py index 9333bd324ac9f..3a2d7f169d603 100755 --- a/eden/mononoke/tests/integration/run_tests_getdeps.py +++ b/eden/mononoke/tests/integration/run_tests_getdeps.py @@ -72,7 +72,6 @@ else: excluded_tests = { "test-backsync-forever.t", # Unknown issue - "test-backsyncer-merges.t", # Missing BACKSYNCER "test-blobimport-lfs.t", # Timed out "test-blobimport.t", # Case insensitivity of paths in MacOS "test-blobstore_healer.t", # PANIC not implemented in sql_ext @@ -124,7 +123,7 @@ "test-push-protocol-lfs.t", # Timed out "test-push-redirector-pushrebase-hooks.t", # Hooks are not in OSS yet "test-push-redirector-pushrebase-onesided.t", # Missing MONONOKE_X_REPO_SYNC - "test-push-redirector-sync-job.t", # Missing BACKSYNCER + "test-push-redirector-sync-job.t", # Missing MONONOKE_HG_SYNC "test-pushrebase-block-casefolding.t", # Most likely MacOS path case insensitivity "test-pushrebase-discovery.t", # Hooks are not in OSS yet "test-remotefilelog-lfs.t", # Timed out diff --git a/eden/mononoke/tests/integration/test-backsyncer-merges.t b/eden/mononoke/tests/integration/test-backsyncer-merges.t index d58e9abf8aa71..1eed963c98c7e 100644 --- a/eden/mononoke/tests/integration/test-backsyncer-merges.t +++ b/eden/mononoke/tests/integration/test-backsyncer-merges.t @@ -83,7 +83,7 @@ although the second one became non-merge commit Make sure we have directory from the first move, but not from the second - $ ls - filetoremove + $ LC_ALL=C ls file.txt + filetoremove newrepo From 2b4300ea35aa83d5955a3d19a23c3621c0c6daab Mon Sep 17 00:00:00 2001 From: Lukas Piatkowski Date: Wed, 29 Jul 2020 05:56:09 -0700 Subject: [PATCH 2/3] mononoke/hg_sync_job: make mononoke_hg_sync_job public Summary: mononoke_hg_sync_job is used in integration tests, make it public Differential Revision: D22795881 fbshipit-source-id: 2eedf59843aed89a6b3f4fd44c21fcdf06d76916 --- eden/mononoke/Cargo.toml | 4 +- eden/mononoke/common/rust/sql_ext/src/oss.rs | 13 +- eden/mononoke/mononoke_hg_sync_job/Cargo.toml | 78 + .../helper_lib}/lib.rs | 0 .../helper_lib}/pushrebase_replay.bundle | Bin .../mononoke_hg_sync_job/schemas/hgsql.sql | 13 + .../src/bundle_generator.rs | 373 +++++ .../src/bundle_preparer.rs | 312 ++++ .../mononoke_hg_sync_job/src/errors.rs | 40 + .../src/globalrev_syncer.rs | 262 ++++ .../mononoke_hg_sync_job/src/hgrepo.rs | 609 ++++++++ .../mononoke_hg_sync_job/src/lfs_verifier.rs | 148 ++ .../src/listserverbookmarks.py | 29 + .../mononoke/mononoke_hg_sync_job/src/main.rs | 1259 +++++++++++++++++ .../src/sendunbundlereplay.py | 199 +++ .../Cargo.toml | 24 - eden/mononoke/tests/integration/manifest_deps | 1 + .../tests/integration/run_tests_getdeps.py | 11 +- 18 files changed, 3337 insertions(+), 38 deletions(-) create mode 100644 eden/mononoke/mononoke_hg_sync_job/Cargo.toml rename eden/mononoke/{mononoke_hg_sync_job_helper_lib/src => mononoke_hg_sync_job/helper_lib}/lib.rs (100%) rename eden/mononoke/{mononoke_hg_sync_job_helper_lib/src => mononoke_hg_sync_job/helper_lib}/pushrebase_replay.bundle (100%) create mode 100644 eden/mononoke/mononoke_hg_sync_job/schemas/hgsql.sql create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/bundle_generator.rs create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/bundle_preparer.rs create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/errors.rs create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/globalrev_syncer.rs create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/hgrepo.rs create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/lfs_verifier.rs create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/listserverbookmarks.py create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/main.rs create mode 100644 eden/mononoke/mononoke_hg_sync_job/src/sendunbundlereplay.py delete mode 100644 eden/mononoke/mononoke_hg_sync_job_helper_lib/Cargo.toml diff --git a/eden/mononoke/Cargo.toml b/eden/mononoke/Cargo.toml index 6aa480b8fe19c..1e0bbcffc5d9c 100644 --- a/eden/mononoke/Cargo.toml +++ b/eden/mononoke/Cargo.toml @@ -118,7 +118,7 @@ mercurial_derived_data = { path = "derived_data/mercurial_derived_data" } mercurial_revlog = { path = "mercurial/revlog" } mercurial_types = { path = "mercurial/types" } metaconfig_types = { path = "metaconfig/types" } -mononoke_hg_sync_job_helper_lib = { path = "mononoke_hg_sync_job_helper_lib" } +mononoke_hg_sync_job_helper_lib = { path = "mononoke_hg_sync_job" } mononoke_types = { path = "mononoke_types" } movers = { path = "commit_rewriting/movers" } mutable_counters = { path = "mutable_counters" } @@ -298,7 +298,7 @@ members = [ "microwave/builder", "microwave/if", "mononoke_api", - "mononoke_hg_sync_job_helper_lib", + "mononoke_hg_sync_job", "mononoke_types", "mononoke_types/if", "mononoke_types/mocks", diff --git a/eden/mononoke/common/rust/sql_ext/src/oss.rs b/eden/mononoke/common/rust/sql_ext/src/oss.rs index 46a53d481709c..e1ec3181b5809 100644 --- a/eden/mononoke/common/rust/sql_ext/src/oss.rs +++ b/eden/mononoke/common/rust/sql_ext/src/oss.rs @@ -9,7 +9,8 @@ use crate::{facebook::*, *}; use anyhow::Error; use fbinit::FacebookInit; -use futures_ext::BoxFuture; +use futures_ext::{BoxFuture, FutureExt}; +use futures_old::future::ok; use slog::Logger; macro_rules! fb_unimplemented { @@ -40,7 +41,15 @@ pub fn create_myrouter_connections( fb_unimplemented!() } -pub fn myrouter_ready(_: Option, _: MysqlOptions, _: Logger) -> BoxFuture<(), Error> { +pub fn myrouter_ready( + db_addr_opt: Option, + mysql_options: MysqlOptions, + _: Logger, +) -> BoxFuture<(), Error> { + if db_addr_opt.is_none() || mysql_options.myrouter_port.is_none() { + return ok(()).boxify(); + }; + fb_unimplemented!() } diff --git a/eden/mononoke/mononoke_hg_sync_job/Cargo.toml b/eden/mononoke/mononoke_hg_sync_job/Cargo.toml new file mode 100644 index 0000000000000..523fb26873dc5 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/Cargo.toml @@ -0,0 +1,78 @@ +[package] +name = "mononoke_hg_sync_job_helper_lib" +edition = "2018" +version = "0.1.0" +authors = ['Facebook'] +license = "GPLv2+" +include = ["helper_lib/**/*.rs", "schemas/**/*.sql", "src/**/*.py", "src/**/*.rs"] + +[lib] +path = "helper_lib/lib.rs" + +[[bin]] +name = "mononoke_hg_sync_job" +path = "src/main.rs" + +[dependencies] +blobrepo = { path = "../blobrepo" } +blobrepo_hg = { path = "../blobrepo/blobrepo_hg" } +blobstore = { path = "../blobstore" } +bookmarks = { path = "../bookmarks" } +cmdlib = { path = "../cmdlib" } +context = { path = "../server/context" } +dbbookmarks = { path = "../bookmarks/dbbookmarks" } +getbundle_response = { path = "../repo_client/getbundle_response" } +hgserver_config = { path = "../../../configerator/structs/scm/mononoke/hgserverconf" } +lfs_protocol = { path = "../lfs_protocol" } +mercurial_bundle_replay_data = { path = "../mercurial/bundle_replay_data" } +mercurial_bundles = { path = "../mercurial/bundles" } +mercurial_revlog = { path = "../mercurial/revlog" } +mercurial_types = { path = "../mercurial/types" } +metaconfig_types = { path = "../metaconfig/types" } +mononoke_types = { path = "../mononoke_types" } +mutable_counters = { path = "../mutable_counters" } +reachabilityindex = { path = "../reachabilityindex" } +repo_read_write_status = { path = "../repo_client/repo_read_write_status" } +revset = { path = "../revset" } +scuba_ext = { path = "../common/scuba_ext" } +skiplist = { path = "../reachabilityindex/skiplist" } +sql_construct = { path = "../common/sql_construct" } +sql_ext = { path = "../common/rust/sql_ext" } +cloned = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +failure_ext = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +fbinit = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +futures_ext = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +futures_stats = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +sql = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +anyhow = "1.0" +base64 = "0.11.0" +bytes = { version = "0.5", features = ["serde"] } +bytes-old = { package = "bytes", version = "0.4", features = ["serde"] } +clap = "2.33" +futures = { version = "0.3.5", features = ["async-await", "compat"] } +futures-old = { package = "futures", version = "0.1" } +http = "0.1" +hyper = "0.12" +hyper-openssl = "0.7" +itertools = "0.8" +maplit = "1.0" +parking_lot = "0.10.2" +regex = "1.3.7" +serde_json = "1.0" +slog = { version = "2.5", features = ["max_level_debug"] } +tempfile = "3.1" +thiserror = "1.0" +tokio = { version = "=0.2.13", features = ["full"] } +tokio-io = "0.1" +tokio-old = { package = "tokio", version = "0.1" } +tokio-process = "0.2" +tokio-timer = "0.2" + +[dev-dependencies] +blobrepo_factory = { path = "../blobrepo/factory" } +bonsai_globalrev_mapping = { path = "../bonsai_globalrev_mapping" } +mercurial_types-mocks = { path = "../mercurial/types/mocks" } +mononoke_types-mocks = { path = "../mononoke_types/mocks" } +async_unit = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +assert_matches = "1.3" +tokio-compat = "0.1" diff --git a/eden/mononoke/mononoke_hg_sync_job_helper_lib/src/lib.rs b/eden/mononoke/mononoke_hg_sync_job/helper_lib/lib.rs similarity index 100% rename from eden/mononoke/mononoke_hg_sync_job_helper_lib/src/lib.rs rename to eden/mononoke/mononoke_hg_sync_job/helper_lib/lib.rs diff --git a/eden/mononoke/mononoke_hg_sync_job_helper_lib/src/pushrebase_replay.bundle b/eden/mononoke/mononoke_hg_sync_job/helper_lib/pushrebase_replay.bundle similarity index 100% rename from eden/mononoke/mononoke_hg_sync_job_helper_lib/src/pushrebase_replay.bundle rename to eden/mononoke/mononoke_hg_sync_job/helper_lib/pushrebase_replay.bundle diff --git a/eden/mononoke/mononoke_hg_sync_job/schemas/hgsql.sql b/eden/mononoke/mononoke_hg_sync_job/schemas/hgsql.sql new file mode 100644 index 0000000000000..079730f8f51d5 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/schemas/hgsql.sql @@ -0,0 +1,13 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +CREATE TABLE `revision_references` ( + `repo` VARBINARY(64) NOT NULL, + `namespace` VARBINARY(32) NOT NULL, + `name` VARBINARY(256) NULL, + `value` VARBINARY(40) NOT NULL +); diff --git a/eden/mononoke/mononoke_hg_sync_job/src/bundle_generator.rs b/eden/mononoke/mononoke_hg_sync_job/src/bundle_generator.rs new file mode 100644 index 0000000000000..1f9cffc59a1a4 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/bundle_generator.rs @@ -0,0 +1,373 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use crate::lfs_verifier::LfsVerifier; +use anyhow::{bail, Error}; +use blobrepo::BlobRepo; +use blobrepo_hg::BlobRepoHg; +use blobstore::Loadable; +use bookmarks::{BookmarkName, BookmarkUpdateLogEntry}; +use bytes::Bytes; +use bytes_old::Bytes as BytesOld; +use cloned::cloned; +use context::CoreContext; +use futures::future::{FutureExt as NewFutureExt, TryFutureExt}; +use futures_ext::{try_boxfuture, FutureExt, StreamExt}; +use futures_old::{ + future::{self, IntoFuture}, + stream, Future, Stream, +}; +use getbundle_response::{ + create_filenodes, create_manifest_entries_stream, get_manifests_and_filenodes, + PreparedFilenodeEntry, SessionLfsParams, +}; +use maplit::hashmap; +use mercurial_bundles::{ + capabilities::{encode_capabilities, Capabilities}, + changegroup::CgVersion, + create_bundle_stream, parts, +}; +use mercurial_revlog::RevlogChangeset; +use mercurial_types::{HgBlobNode, HgChangesetId, MPath}; +use mononoke_types::{datetime::Timestamp, hash::Sha256, ChangesetId}; +use reachabilityindex::LeastCommonAncestorsHint; +use revset::DifferenceOfUnionsOfAncestorsNodeStream; +use slog::debug; +use std::{collections::HashMap, sync::Arc}; + +pub fn create_bundle( + ctx: CoreContext, + repo: BlobRepo, + lca_hint: Arc, + bookmark: BookmarkName, + bookmark_change: BookmarkChange, + hg_server_heads: Vec, + lfs_params: SessionLfsParams, + filenode_verifier: FilenodeVerifier, +) -> impl Future), Error = Error> { + let commits_to_push = find_commits_to_push( + ctx.clone(), + repo.clone(), + lca_hint.clone(), + // Always add "from" bookmark, because is must to be on the hg server + // If it's not then the push will fail anyway + hg_server_heads + .into_iter() + .chain(bookmark_change.get_from().into_iter()), + bookmark_change.get_to(), + ) + .collect() + .map(|reversed| reversed.into_iter().rev().collect()); + + commits_to_push + .and_then({ + move |commits_to_push: Vec<_>| { + debug!( + ctx.logger(), + "generating a bundle with {} commits", + commits_to_push.len() + ); + let bundle = create_bundle_impl( + ctx.clone(), + repo.clone(), + bookmark, + bookmark_change, + commits_to_push.clone(), + lfs_params, + filenode_verifier, + ); + let timestamps = fetch_timestamps(ctx, repo, commits_to_push); + bundle.join(timestamps) + } + }) + .boxify() +} + +pub enum BookmarkChange { + Created(ChangesetId), + Deleted(ChangesetId), + Moved { from: ChangesetId, to: ChangesetId }, +} + +impl BookmarkChange { + pub fn new(entry: &BookmarkUpdateLogEntry) -> Result { + match (entry.from_changeset_id, entry.to_changeset_id) { + (Some(ref from), None) => Ok(BookmarkChange::Deleted(*from)), + (None, Some(ref to)) => Ok(BookmarkChange::Created(*to)), + (Some(ref from), Some(ref to)) => Ok(BookmarkChange::Moved { + from: *from, + to: *to, + }), + (None, None) => bail!("unsupported bookmark move: deletion of non-existent bookmark?",), + } + } + + fn get_from(&self) -> Option { + use BookmarkChange::*; + + match self { + Created(_) => None, + Deleted(cs_id) => Some(*cs_id), + Moved { from, .. } => Some(*from), + } + } + + fn get_from_hg( + &self, + ctx: CoreContext, + repo: &BlobRepo, + ) -> impl Future, Error = Error> { + Self::maybe_get_hg(ctx, self.get_from(), repo) + } + + fn get_to(&self) -> Option { + use BookmarkChange::*; + + match self { + Created(cs_id) => Some(*cs_id), + Deleted(_) => None, + Moved { to, .. } => Some(*to), + } + } + + fn get_to_hg( + &self, + ctx: CoreContext, + repo: &BlobRepo, + ) -> impl Future, Error = Error> { + Self::maybe_get_hg(ctx, self.get_to(), repo) + } + + fn maybe_get_hg( + ctx: CoreContext, + maybe_cs: Option, + repo: &BlobRepo, + ) -> impl Future, Error = Error> { + match maybe_cs { + Some(cs_id) => repo + .get_hg_from_bonsai_changeset(ctx, cs_id) + .map(Some) + .left_future(), + None => future::ok(None).right_future(), + } + } +} + +#[derive(Clone)] +pub enum FilenodeVerifier { + NoopVerifier, + LfsVerifier(LfsVerifier), +} + +impl FilenodeVerifier { + fn verify_entries( + &self, + filenode_entries: &HashMap>, + ) -> impl Future + 'static { + match self { + Self::NoopVerifier => Ok(()).into_future().left_future(), + Self::LfsVerifier(lfs_verifier) => { + let lfs_blobs: Vec<(Sha256, u64)> = filenode_entries + .values() + .map(|entries| entries.into_iter()) + .flatten() + .filter_map(|entry| { + entry + .maybe_get_lfs_pointer() + .map(|(sha256, size)| (sha256, size)) + }) + .collect(); + + lfs_verifier.verify_lfs_presence(&lfs_blobs).right_future() + } + } + } +} + +fn create_bundle_impl( + ctx: CoreContext, + repo: BlobRepo, + bookmark: BookmarkName, + bookmark_change: BookmarkChange, + commits_to_push: Vec, + session_lfs_params: SessionLfsParams, + filenode_verifier: FilenodeVerifier, +) -> impl Future { + let changelog_entries = stream::iter_ok(commits_to_push.clone()) + .map({ + cloned!(ctx, repo); + move |hg_cs_id| { + hg_cs_id + .load(ctx.clone(), repo.blobstore()) + .compat() + .from_err() + .map(move |cs| (hg_cs_id, cs)) + } + }) + .buffered(100) + .and_then(|(hg_cs_id, cs)| { + let revlogcs = RevlogChangeset::new_from_parts( + cs.parents().clone(), + cs.manifestid().clone(), + cs.user().into(), + cs.time().clone(), + cs.extra().clone(), + cs.files().into(), + cs.message().into(), + ); + + let mut v = Vec::new(); + mercurial_revlog::changeset::serialize_cs(&revlogcs, &mut v)?; + Ok(( + hg_cs_id.into_nodehash(), + HgBlobNode::new(Bytes::from(v), revlogcs.p1(), revlogcs.p2()), + )) + }); + + let entries = { + cloned!(ctx, repo, commits_to_push, session_lfs_params); + async move { + get_manifests_and_filenodes(&ctx, &repo, commits_to_push, &session_lfs_params).await + } + .boxed() + .compat() + }; + + ( + entries, + bookmark_change.get_from_hg(ctx.clone(), &repo), + bookmark_change.get_to_hg(ctx.clone(), &repo), + ) + .into_future() + .and_then( + move |((manifests, prepared_filenode_entries), maybe_from, maybe_to)| { + let mut bundle2_parts = + vec![try_boxfuture!(parts::replycaps_part(create_capabilities()))]; + + debug!( + ctx.logger(), + "prepared {} manifests and {} filenodes", + manifests.len(), + prepared_filenode_entries.len() + ); + let cg_version = if session_lfs_params.threshold.is_some() { + CgVersion::Cg3Version + } else { + CgVersion::Cg2Version + }; + + // Check that the filenodes pass the verifier prior to serializing them. + let verify_ok = filenode_verifier.verify_entries(&prepared_filenode_entries); + + let filenode_entries = + create_filenodes(ctx.clone(), repo.clone(), prepared_filenode_entries).boxify(); + + let filenode_entries = verify_ok + .and_then(move |_| Ok(filenode_entries)) + .flatten_stream() + .boxify(); + + if commits_to_push.len() > 0 { + bundle2_parts.push(try_boxfuture!(parts::changegroup_part( + changelog_entries, + Some(filenode_entries), + cg_version, + ))); + + bundle2_parts.push(try_boxfuture!(parts::treepack_part( + create_manifest_entries_stream(ctx, repo.get_blobstore(), manifests) + ))); + } + + bundle2_parts.push(try_boxfuture!(parts::bookmark_pushkey_part( + bookmark.to_string(), + format!( + "{}", + maybe_from.map(|x| x.to_string()).unwrap_or(String::new()) + ), + format!( + "{}", + maybe_to.map(|x| x.to_string()).unwrap_or(String::new()) + ), + ))); + + let compression = None; + create_bundle_stream(bundle2_parts, compression) + .concat2() + .boxify() + }, + ) +} + +fn fetch_timestamps( + ctx: CoreContext, + repo: BlobRepo, + hg_cs_ids: impl IntoIterator, +) -> impl Future, Error = Error> { + stream::iter_ok(hg_cs_ids) + .map(move |hg_cs_id| { + hg_cs_id + .load(ctx.clone(), repo.blobstore()) + .compat() + .from_err() + .map(move |hg_blob_cs| (hg_cs_id, hg_blob_cs.time().clone().into())) + }) + .buffered(100) + .collect_to() +} + +fn find_commits_to_push( + ctx: CoreContext, + repo: BlobRepo, + lca_hint_index: Arc, + hg_server_heads: impl IntoIterator, + maybe_to_cs_id: Option, +) -> impl Stream { + DifferenceOfUnionsOfAncestorsNodeStream::new_with_excludes( + ctx.clone(), + &repo.get_changeset_fetcher(), + lca_hint_index, + maybe_to_cs_id.into_iter().collect(), + hg_server_heads.into_iter().collect(), + ) + .map(move |bcs_id| repo.get_hg_from_bonsai_changeset(ctx.clone(), bcs_id)) + .buffered(100) +} + +// TODO(stash): this should generate different capabilities depending on whether client +// supports changegroup3 or not +fn create_capabilities() -> BytesOld { + // List of capabilities that was copied from real bundle generated by Mercurial client. + let caps_ref = hashmap! { + "HG20" => vec![], + "b2x:infinitepush" => vec![], + "b2x:infinitepushmutation" => vec![], + "b2x:infinitepushscratchbookmarks" => vec![], + "b2x:rebase" => vec![], + "bookmarks" => vec![], + "changegroup" => vec!["01", "02"], + "digests" => vec!["md5", "sha1", "sha512"], + "error" => vec!["abort", "unsupportedcntent", "pushraced", "pushkey"], + "hgtagsfnodes" => vec![], + "listkeys" => vec![], + "phases" => vec!["heads"], + "pushback" => vec![], + "pushkey" => vec![], + "remote-changegroup" => vec!["http", "https"], + "remotefilelog" => vec!["True"], + "treemanifest" => vec!["True"], + "treeonly" => vec!["True"], + }; + + let mut caps = hashmap! {}; + for (key, values) in caps_ref { + let values = values.into_iter().map(|v| v.to_string()).collect(); + caps.insert(key.to_string(), values); + } + + encode_capabilities(Capabilities::new(caps)) +} diff --git a/eden/mononoke/mononoke_hg_sync_job/src/bundle_preparer.rs b/eden/mononoke/mononoke_hg_sync_job/src/bundle_preparer.rs new file mode 100644 index 0000000000000..c7fb75c6048c6 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/bundle_preparer.rs @@ -0,0 +1,312 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use crate::bundle_generator::{BookmarkChange, FilenodeVerifier}; +use crate::errors::ErrorKind::{ReplayDataMissing, UnexpectedBookmarkMove}; +use anyhow::Error; +use blobrepo::BlobRepo; +use blobrepo_hg::BlobRepoHg; +use bookmarks::{BookmarkName, BookmarkUpdateLogEntry, BookmarkUpdateReason, RawBundleReplayData}; +use cloned::cloned; +use context::CoreContext; +use futures::future::{try_join, FutureExt as _, TryFutureExt}; +use futures_ext::{try_boxfuture, BoxFuture, FutureExt}; +use futures_old::{ + future::{err, ok}, + Future, +}; +use getbundle_response::SessionLfsParams; +use itertools::Itertools; +use mercurial_bundle_replay_data::BundleReplayData; +use mercurial_types::HgChangesetId; +use metaconfig_types::LfsParams; +use mononoke_hg_sync_job_helper_lib::{ + retry, save_bundle_to_temp_file, save_bytes_to_temp_file, write_to_named_temp_file, +}; +use mononoke_types::{datetime::Timestamp, ChangesetId}; +use reachabilityindex::LeastCommonAncestorsHint; +use regex::Regex; +use skiplist::fetch_skiplist_index; +use slog::info; +use std::collections::HashMap; +use std::convert::TryFrom; +use std::sync::Arc; +use tempfile::NamedTempFile; + +#[derive(Clone)] +pub struct PreparedBookmarkUpdateLogEntry { + pub log_entry: BookmarkUpdateLogEntry, + pub bundle_file: Arc, + pub timestamps_file: Arc, + pub cs_id: Option<(ChangesetId, HgChangesetId)>, +} + +pub struct BundlePreparer { + repo: BlobRepo, + base_retry_delay_ms: u64, + retry_num: usize, + ty: BundleType, +} + +#[derive(Clone)] +enum BundleType { + // Use a bundle that was saved on Mononoke during the push + UseExisting, + // Generate a new bundle + GenerateNew { + lca_hint: Arc, + lfs_params: LfsParams, + filenode_verifier: FilenodeVerifier, + bookmark_regex_force_lfs: Option, + }, +} + +impl BundlePreparer { + pub fn new_use_existing( + repo: BlobRepo, + base_retry_delay_ms: u64, + retry_num: usize, + ) -> impl Future { + ok(BundlePreparer { + repo, + base_retry_delay_ms, + retry_num, + ty: BundleType::UseExisting, + }) + } + + pub fn new_generate_bundles( + ctx: CoreContext, + repo: BlobRepo, + base_retry_delay_ms: u64, + retry_num: usize, + maybe_skiplist_blobstore_key: Option, + lfs_params: LfsParams, + filenode_verifier: FilenodeVerifier, + bookmark_regex_force_lfs: Option, + ) -> impl Future { + let blobstore = repo.get_blobstore().boxed(); + async move { fetch_skiplist_index(&ctx, &maybe_skiplist_blobstore_key, &blobstore).await } + .boxed() + .compat() + .map(move |skiplist| { + let lca_hint: Arc = skiplist; + BundlePreparer { + repo, + base_retry_delay_ms, + retry_num, + ty: BundleType::GenerateNew { + lca_hint, + lfs_params, + filenode_verifier, + bookmark_regex_force_lfs, + }, + } + }) + } + + pub fn prepare_single_bundle( + &self, + ctx: CoreContext, + log_entry: BookmarkUpdateLogEntry, + overlay: crate::BookmarkOverlay, + ) -> BoxFuture { + cloned!(self.repo, self.ty); + + let entry_id = log_entry.id; + retry( + ctx.logger().clone(), + { + cloned!(ctx, repo, ty, log_entry); + move |_| { + Self::try_prepare_single_bundle( + ctx.clone(), + repo.clone(), + log_entry.clone(), + ty.clone(), + overlay.get_bookmark_values(), + ) + } + }, + self.base_retry_delay_ms, + self.retry_num, + ) + .map({ + cloned!(ctx); + move |(p, _attempts)| { + info!(ctx.logger(), "successful prepare of entry #{}", entry_id); + p + } + }) + .boxify() + } + + fn try_prepare_single_bundle( + ctx: CoreContext, + repo: BlobRepo, + log_entry: BookmarkUpdateLogEntry, + bundle_type: BundleType, + hg_server_heads: Vec, + ) -> impl Future { + use BookmarkUpdateReason::*; + + info!(ctx.logger(), "preparing log entry #{} ...", log_entry.id); + + enum PrepareType<'a> { + Generate { + lca_hint: Arc, + lfs_params: SessionLfsParams, + filenode_verifier: FilenodeVerifier, + }, + UseExisting { + bundle_replay_data: &'a RawBundleReplayData, + }, + } + + let blobstore = repo.get_blobstore(); + match log_entry.reason { + Pushrebase | Backsyncer | ManualMove => {} + Blobimport | Push | XRepoSync | TestMove { .. } => { + return err(UnexpectedBookmarkMove(format!("{}", log_entry.reason)).into()) + .boxify(); + } + } + + let prepare_type = match bundle_type { + BundleType::GenerateNew { + lca_hint, + lfs_params, + filenode_verifier, + bookmark_regex_force_lfs, + } => PrepareType::Generate { + lca_hint, + lfs_params: get_session_lfs_params( + &ctx, + &log_entry.bookmark_name, + lfs_params, + &bookmark_regex_force_lfs, + ), + filenode_verifier, + }, + BundleType::UseExisting => match &log_entry.bundle_replay_data { + Some(bundle_replay_data) => PrepareType::UseExisting { bundle_replay_data }, + None => return err(ReplayDataMissing { id: log_entry.id }.into()).boxify(), + }, + }; + + let bundle_and_timestamps_files = match prepare_type { + PrepareType::Generate { + lca_hint, + lfs_params, + filenode_verifier, + } => crate::bundle_generator::create_bundle( + ctx.clone(), + repo.clone(), + lca_hint.clone(), + log_entry.bookmark_name.clone(), + try_boxfuture!(BookmarkChange::new(&log_entry)), + hg_server_heads, + lfs_params, + filenode_verifier, + ) + .and_then(|(bytes, timestamps)| { + async move { + try_join( + save_bytes_to_temp_file(&bytes), + save_timestamps_to_file(×tamps), + ) + .await + } + .boxed() + .compat() + }) + .boxify(), + PrepareType::UseExisting { bundle_replay_data } => { + // TODO: We could remove this clone on bundle_replay_data if this whole + // function was async. + cloned!(ctx, bundle_replay_data); + async move { + match BundleReplayData::try_from(&bundle_replay_data) { + Ok(bundle_replay_data) => { + try_join( + save_bundle_to_temp_file( + &ctx, + &blobstore, + bundle_replay_data.bundle2_id, + ), + save_timestamps_to_file(&bundle_replay_data.timestamps), + ) + .await + } + Err(e) => Err(e.into()), + } + } + .boxed() + .compat() + .boxify() + } + }; + + let cs_id = match log_entry.to_changeset_id { + Some(to_changeset_id) => repo + .get_hg_from_bonsai_changeset(ctx.clone(), to_changeset_id) + .map(move |hg_cs_id| Some((to_changeset_id, hg_cs_id))) + .left_future(), + None => ok(None).right_future(), + }; + + bundle_and_timestamps_files + .join(cs_id) + .map( + |((bundle_file, timestamps_file), cs_id)| PreparedBookmarkUpdateLogEntry { + log_entry, + bundle_file: Arc::new(bundle_file), + timestamps_file: Arc::new(timestamps_file), + cs_id, + }, + ) + .boxify() + } +} + +fn get_session_lfs_params( + ctx: &CoreContext, + bookmark: &BookmarkName, + lfs_params: LfsParams, + bookmark_regex_force_lfs: &Option, +) -> SessionLfsParams { + if let Some(regex) = bookmark_regex_force_lfs { + if regex.is_match(bookmark.as_str()) { + info!(ctx.logger(), "force generating lfs bundle for {}", bookmark); + return SessionLfsParams { + threshold: lfs_params.threshold, + }; + } + } + + if lfs_params.generate_lfs_blob_in_hg_sync_job { + SessionLfsParams { + threshold: lfs_params.threshold, + } + } else { + SessionLfsParams { threshold: None } + } +} + +async fn save_timestamps_to_file( + timestamps: &HashMap, +) -> Result { + let encoded_timestamps = timestamps + .iter() + .map(|(key, value)| { + let timestamp = value.timestamp_seconds(); + format!("{}={}", key, timestamp) + }) + .join("\n"); + + write_to_named_temp_file(encoded_timestamps).await +} diff --git a/eden/mononoke/mononoke_hg_sync_job/src/errors.rs b/eden/mononoke/mononoke_hg_sync_job/src/errors.rs new file mode 100644 index 0000000000000..48e8ae360be5b --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/errors.rs @@ -0,0 +1,40 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use anyhow::Error; +use bookmarks::BookmarkUpdateLogEntry; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ErrorKind { + #[error("replay data is missing for id {id}")] + ReplayDataMissing { id: i64 }, + #[error("unexpected bookmark move: {0}")] + UnexpectedBookmarkMove(String), + #[error("sync failed for ids {ids:?}")] + SyncFailed { + ids: Vec, + #[source] + cause: Error, + }, +} + +#[derive(Debug, Error)] +pub enum PipelineError { + #[error("error processing entries {entries:?}")] + EntryError { + entries: Vec, + #[source] + cause: Error, + }, + + #[error("error without tracking entry")] + AnonymousError { + #[source] + cause: Error, + }, +} diff --git a/eden/mononoke/mononoke_hg_sync_job/src/globalrev_syncer.rs b/eden/mononoke/mononoke_hg_sync_job/src/globalrev_syncer.rs new file mode 100644 index 0000000000000..e3d88f6d96c02 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/globalrev_syncer.rs @@ -0,0 +1,262 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use anyhow::{format_err, Error}; +use blobrepo::BlobRepo; +use fbinit::FacebookInit; +use futures::compat::Future01CompatExt; +use metaconfig_types::HgsqlGlobalrevsName; +use mononoke_types::ChangesetId; +use sql::{queries, Connection}; +use sql_construct::{facebook::FbSqlConstruct, SqlConstruct}; +use sql_ext::{facebook::MysqlOptions, SqlConnections}; +use std::path::Path; +use std::sync::Arc; + +#[derive(Clone)] +pub enum GlobalrevSyncer { + Noop, + Sql(Arc), +} + +pub struct SqlGlobalrevSyncer { + hgsql_name: HgsqlGlobalrevsName, + repo: BlobRepo, + hgsql: HgsqlConnection, +} + +#[derive(Clone)] +struct HgsqlConnection { + connection: Connection, +} + +impl SqlConstruct for HgsqlConnection { + const LABEL: &'static str = "globalrev-syncer"; + + const CREATION_QUERY: &'static str = include_str!("../schemas/hgsql.sql"); + + fn from_sql_connections(connections: SqlConnections) -> Self { + Self { + connection: connections.write_connection, + } + } +} + +impl GlobalrevSyncer { + pub async fn new( + fb: FacebookInit, + repo: BlobRepo, + use_sqlite: bool, + hgsql_db_addr: Option<&str>, + mysql_options: MysqlOptions, + readonly: bool, + hgsql_name: HgsqlGlobalrevsName, + ) -> Result { + let hgsql_db_addr = match hgsql_db_addr { + Some(hgsql_db_addr) => hgsql_db_addr, + None => return Ok(GlobalrevSyncer::Noop), + }; + + let hgsql = if use_sqlite { + HgsqlConnection::with_sqlite_path(Path::new(hgsql_db_addr), readonly)? + } else { + HgsqlConnection::with_xdb(fb, hgsql_db_addr.to_string(), mysql_options, readonly) + .await? + }; + + let syncer = SqlGlobalrevSyncer { + hgsql_name, + repo, + hgsql, + }; + + Ok(GlobalrevSyncer::Sql(Arc::new(syncer))) + } + + pub async fn sync(&self, bcs_id: ChangesetId) -> Result<(), Error> { + match self { + Self::Noop => Ok(()), + Self::Sql(syncer) => syncer.sync(bcs_id).await, + } + } +} + +impl SqlGlobalrevSyncer { + pub async fn sync(&self, bcs_id: ChangesetId) -> Result<(), Error> { + let rev = self + .repo + .get_globalrev_from_bonsai(bcs_id) + .compat() + .await? + .ok_or_else(|| format_err!("Globalrev is missing for bcs_id = {}", bcs_id))? + .id() + + 1; + + let rows = + IncreaseGlobalrevCounter::query(&self.hgsql.connection, self.hgsql_name.as_ref(), &rev) + .compat() + .await? + .affected_rows(); + + if rows > 0 { + return Ok(()); + } + + // If the counter is already where we want it do be, then we won't actually modify the row, + // and affected_rows will return 0. The right way to fix this would be to set + // CLIENT_FOUND_ROWS when connecting to MySQL and use value <= rev so that affected_rows + // tells us about rows it found as opposed to rows actually modified (which is how SQLite + // would behave locally). However, for now let's do the more expedient thing and just have + // both MySQL and SQLite behave the same by avoiding no-op updates. This makes this logic + // easier to unit test. + + let db_rev = GetGlobalrevCounter::query(&self.hgsql.connection, self.hgsql_name.as_ref()) + .compat() + .await? + .into_iter() + .next() + .map(|r| r.0); + + if let Some(db_rev) = db_rev { + if db_rev == rev { + return Ok(()); + } + } + + Err(format_err!( + "Attempted to move Globalrev for repository {:?} backwards to {} (from {:?})", + self.hgsql_name, + rev, + db_rev, + )) + } +} + +queries! { + write IncreaseGlobalrevCounter(repo: String, rev: u64) { + none, + " + UPDATE revision_references + SET value = {rev} + WHERE repo = {repo} + AND namespace = 'counter' + AND name = 'commit' + AND value < {rev} + " + } + + read GetGlobalrevCounter(repo: String) -> (u64) { + " + SELECT value FROM revision_references + WHERE repo = {repo} + AND namespace = 'counter' + AND name = 'commit' + " + } +} + +#[cfg(test)] +mod test { + use super::*; + use bonsai_globalrev_mapping::BonsaiGlobalrevMappingEntry; + use mercurial_types_mocks::globalrev::{GLOBALREV_ONE, GLOBALREV_THREE, GLOBALREV_TWO}; + use mononoke_types_mocks::changesetid::{ONES_CSID, TWOS_CSID}; + use mononoke_types_mocks::repo::REPO_ZERO; + use sql::rusqlite::Connection as SqliteConnection; + + queries! { + write InitGlobalrevCounter(repo: String, rev: u64) { + none, + " + INSERT INTO revision_references(repo, namespace, name, value) + VALUES ({repo}, 'counter', 'commit', {rev}) + " + } + } + + #[test] + fn test_sync() -> Result<(), Error> { + async_unit::tokio_unit_test(async move { + let sqlite = SqliteConnection::open_in_memory()?; + sqlite.execute_batch(HgsqlConnection::CREATION_QUERY)?; + let connection = Connection::with_sqlite(sqlite); + + let repo = blobrepo_factory::new_memblob_empty(None)?; + let hgsql_name = HgsqlGlobalrevsName("foo".to_string()); + + let e1 = BonsaiGlobalrevMappingEntry { + repo_id: REPO_ZERO, + bcs_id: ONES_CSID, + globalrev: GLOBALREV_ONE, + }; + + let e2 = BonsaiGlobalrevMappingEntry { + repo_id: REPO_ZERO, + bcs_id: TWOS_CSID, + globalrev: GLOBALREV_TWO, + }; + + repo.bonsai_globalrev_mapping() + .bulk_import(&vec![e1, e2]) + .compat() + .await?; + + let syncer = SqlGlobalrevSyncer { + hgsql_name: hgsql_name.clone(), + repo, + hgsql: HgsqlConnection { + connection: connection.clone(), + }, + }; + + // First, check that setting a globalrev before the counter exists fails. + assert!(syncer.sync(ONES_CSID).await.is_err()); + + // Now, set the counter + + InitGlobalrevCounter::query(&connection, hgsql_name.as_ref(), &0) + .compat() + .await?; + + // Now, try again to set the globalrev + + syncer.sync(TWOS_CSID).await?; + + assert_eq!( + GetGlobalrevCounter::query(&connection, hgsql_name.as_ref()) + .compat() + .await? + .into_iter() + .next() + .ok_or(Error::msg("Globalrev missing"))? + .0, + GLOBALREV_THREE.id() + ); + + // Check that we can sync the same value again successfully + + syncer.sync(TWOS_CSID).await?; + + // Check that we can't move it back + + assert!(syncer.sync(ONES_CSID).await.is_err()); + + assert_eq!( + GetGlobalrevCounter::query(&connection, hgsql_name.as_ref()) + .compat() + .await? + .into_iter() + .next() + .ok_or(Error::msg("Globalrev missing"))? + .0, + GLOBALREV_THREE.id() + ); + + Ok(()) + }) + } +} diff --git a/eden/mononoke/mononoke_hg_sync_job/src/hgrepo.rs b/eden/mononoke/mononoke_hg_sync_job/src/hgrepo.rs new file mode 100644 index 0000000000000..e4a5fb99e8d38 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/hgrepo.rs @@ -0,0 +1,609 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use anyhow::{bail, format_err, Error, Result}; +use bookmarks::BookmarkName; +use cloned::cloned; +use failure_ext::FutureFailureErrorExt; +use futures::future::{FutureExt as _, TryFutureExt}; +use futures_ext::{try_boxfuture, BoxFuture, FutureExt}; +use futures_old::future::{self, err, ok, Either, Future, IntoFuture}; +use mercurial_types::HgChangesetId; +use mononoke_hg_sync_job_helper_lib::{lines_after, read_file_contents, wait_till_more_lines}; +use parking_lot::Mutex; +use slog::{debug, info, Logger}; +use std::collections::HashMap; +use std::ffi::OsStr; +use std::fs; +use std::process::{Command, Stdio}; +use std::str::FromStr; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tempfile::NamedTempFile; +use tokio_io::io::{flush, write_all}; +use tokio_old::prelude::FutureExt as TokioFutureExt; +use tokio_old::timer::timeout::Error as TimeoutError; +use tokio_process::{Child, ChildStdin, CommandExt}; +use tokio_timer::sleep; + +const BOOKMARK_LOCATION_LOOKUP_TIMEOUT_MS: u64 = 10_000; +const LIST_SERVER_BOOKMARKS_EXTENSION: &str = include_str!("listserverbookmarks.py"); +const SEND_UNBUNDLE_REPLAY_EXTENSION: &str = include_str!("sendunbundlereplay.py"); + +pub fn list_hg_server_bookmarks( + hg_repo_path: String, +) -> BoxFuture, Error> { + let extension_file = try_boxfuture!(NamedTempFile::new()); + let file_path = try_boxfuture!(extension_file + .path() + .to_str() + .ok_or(Error::msg("Temp file path contains non-unicode chars"))); + try_boxfuture!(fs::write(file_path, LIST_SERVER_BOOKMARKS_EXTENSION)); + let ext = format!("extensions.listserverbookmarks={}", file_path); + + let full_args = vec![ + "--config", + &ext, + "listserverbookmarks", + "--path", + &hg_repo_path, + ]; + let cmd = Command::new("hg").args(&full_args).output(); + + cmd.into_future() + .from_err() + .into_future() + .and_then(|output| { + let mut res = HashMap::new(); + for keyvalue in output.stdout.split(|x| x == &0) { + if keyvalue.is_empty() { + continue; + } + let mut iter = keyvalue.split(|x| x == &1); + match (iter.next(), iter.next()) { + (Some(key), Some(value)) => { + let key = String::from_utf8(key.to_vec()).map_err(Error::from)?; + let value = String::from_utf8(value.to_vec()).map_err(Error::from)?; + res.insert(BookmarkName::new(key)?, HgChangesetId::from_str(&value)?); + } + _ => { + let stdout = String::from_utf8_lossy(&output.stdout); + bail!("invalid format returned from server: {}", stdout); + } + } + } + Ok(res) + }) + .context("While listing server bookmarks") + .from_err() + .boxify() +} + +fn expected_location_string_arg(maybe_hgcsid: Option) -> String { + match maybe_hgcsid { + Some(hash) => hash.to_string(), + None => "DELETED".into(), + } +} + +fn get_hg_command(args: I) -> Command +where + I: IntoIterator, + S: AsRef, +{ + let full_args = vec![ + "--config", + "extensions.clienttelemetry=", + "--config", + "clienttelemetry.announceremotehostname=on", + ] + .into_iter() + .map(|item| item.into()) + .chain(args.into_iter().map(|item| item.as_ref().to_os_string())); + let mut child = Command::new(&"hg"); + child.args(full_args); + child +} + +#[derive(Clone)] +struct AsyncProcess { + child: Arc>>, + stdin: Arc>>, + can_be_used: Arc, +} + +impl AsyncProcess { + pub fn new(args: I) -> Result + where + I: IntoIterator, + S: AsRef, + { + Self::from_command(get_hg_command(args)) + } + + fn from_command(mut command: Command) -> Result { + let mut child = command + .stdin(Stdio::piped()) + .spawn_async() + .map_err(|e| format_err!("Couldn't spawn hg command: {:?}", e))?; + let stdin = child + .stdin() + .take() + .ok_or(Error::msg("ChildStdin unexpectedly not captured"))?; + Ok(Self { + child: Arc::new(Mutex::new(Some(child))), + stdin: Arc::new(Mutex::new(Some(stdin))), + can_be_used: Arc::new(AtomicBool::new(true)), + }) + } + + pub fn write_line(&self, line: Vec) -> BoxFuture<(), Error> { + let stdin = try_boxfuture!(self.stdin.lock().take().ok_or(Error::msg( + "AsyncProcess unexpectedly does not contain stdin." + ))); + let stdin_arc = self.stdin.clone(); + let process = self.clone(); + write_all(stdin, line) + .and_then(move |(stdin, _)| flush(stdin)) + .map(move |stdin| { + // Need to put stdin back + stdin_arc.lock().replace(stdin); + }) + .map_err(move |e| { + // If we failed for whichever reason, we can't reuse the + // same process. The failure might've been related to the process + // itself, rather than the bundle. Let's err on the safe side + process.invalidate(); + format_err!("{}", e) + }) + .boxify() + } + + pub fn invalidate(&self) { + self.can_be_used.store(false, Ordering::SeqCst); + } + + pub fn is_valid(&self) -> bool { + self.can_be_used.load(Ordering::SeqCst) + } + + pub fn kill(&self, logger: Logger) { + self.child.lock().as_mut().map(|child| { + child + .kill() + .unwrap_or_else(|e| debug!(logger, "failed to kill the hg process: {}", e)) + }); + } + + /// Make sure child is still alive while provided future is being executed + /// If `grace_period` is specified, future will be given additional time + /// to resolve even if peer has already been terminated. + pub fn ensure_alive>( + &self, + fut: F, + grace_period: Option, + ) -> impl Future { + let child = match self.child.lock().take() { + None => return future::err(Error::msg("hg peer is dead")).left_future(), + Some(child) => child, + }; + + let with_grace_period = move |fut: F, error| match grace_period { + None => future::err(error).left_future(), + Some(grace_period) => fut + .select(sleep(grace_period).then(|_| Err(error))) + .then(|result| match result { + Ok((ok, _)) => Ok(ok), + Err((err, _)) => Err(err), + }) + .right_future(), + }; + + child + .select2(fut) + .then({ + let this = self.clone(); + move |result| match result { + Ok(Either::A((exit_status, fut))) => { + this.invalidate(); + with_grace_period( + fut, + format_err!("hg peer has died unexpectedly: {}", exit_status), + ) + .right_future() + } + Err(Either::A((child_err, fut))) => { + this.invalidate(); + with_grace_period(fut, child_err.into()).right_future() + } + Ok(Either::B((future_ok, child))) => { + this.child.lock().replace(child); + future::ok(future_ok).left_future() + } + Err(Either::B((future_err, child))) => { + this.child.lock().replace(child); + future::err(future_err).left_future() + } + } + }) + .right_future() + } +} + +#[derive(Clone)] +struct HgPeer { + process: AsyncProcess, + reports_file: Arc, + bundle_applied: Arc, + max_bundles_allowed: usize, + baseline_bundle_timeout_ms: u64, + extension_file: Arc, +} + +impl !Sync for HgPeer {} + +impl HgPeer { + pub fn new( + repo_path: &str, + max_bundles_allowed: usize, + baseline_bundle_timeout_ms: u64, + ) -> Result { + let reports_file = NamedTempFile::new()?; + let file_path = reports_file + .path() + .to_str() + .ok_or(Error::msg("Temp file path contains non-unicode chars"))?; + + let extension_file = NamedTempFile::new()?; + let extension_path = extension_file + .path() + .to_str() + .ok_or(Error::msg("Temp file path contains non-unicode chars"))?; + fs::write(extension_path, SEND_UNBUNDLE_REPLAY_EXTENSION)?; + + let args = &[ + "--config", + &format!("extensions.sendunbundlereplay={}", extension_path), + "sendunbundlereplaybatch", + "--debug", + "--path", + repo_path, + "--reports", + file_path, + ]; + let process = AsyncProcess::new(args)?; + + Ok(HgPeer { + process, + reports_file: Arc::new(reports_file), + bundle_applied: Arc::new(AtomicUsize::new(0)), + max_bundles_allowed, + baseline_bundle_timeout_ms, + extension_file: Arc::new(extension_file), + }) + } + + pub fn arc_mutexed(self) -> Arc> { + Arc::new(Mutex::new(self)) + } + + pub fn still_good(&self, logger: Logger) -> bool { + let can_be_used: bool = self.process.is_valid(); + let bundle_applied: usize = self.bundle_applied.load(Ordering::SeqCst); + let can_write_more = bundle_applied < self.max_bundles_allowed; + debug!( + logger, + "can be used: {}, bundle_applied: {}, max bundles allowed: {}", + can_be_used, + bundle_applied, + self.max_bundles_allowed + ); + can_be_used && can_write_more + } + + pub fn kill(&self, logger: Logger) { + self.process.kill(logger); + } + + pub fn apply_bundle( + &self, + bundle_path: &str, + timestamps_path: &str, + onto_bookmark: BookmarkName, + expected_bookmark_position: Option, + attempt: usize, + logger: Logger, + ) -> impl Future { + let mut log_file = match NamedTempFile::new() { + Ok(log_file) => log_file, + Err(e) => { + return err(format_err!("could not create log file: {:?}", e)).left_future(); + } + }; + + let log_path = match log_file.path().to_str() { + Some(log_path) => log_path, + None => { + return err(Error::msg("log_file path was not a valid string")).left_future(); + } + }; + + let onto_bookmark = onto_bookmark.to_string(); + let onto_bookmark = base64::encode(&onto_bookmark); + let expected_hash = expected_location_string_arg(expected_bookmark_position); + let input_line = format!( + "{} {} {} {} {}\n", + bundle_path, timestamps_path, onto_bookmark, expected_hash, log_path, + ); + let path = self.reports_file.path().to_path_buf(); + let bundle_timeout_ms = self.baseline_bundle_timeout_ms * 2_u64.pow(attempt as u32 - 1); + { + cloned!(path); + async move { lines_after(&path, 0).await }.boxed().compat() + } + .map(|v| v.len()) + .and_then({ + cloned!(self.process); + move |line_num_in_reports_file| { + process + .write_line(input_line.into_bytes()) + .map(move |_| line_num_in_reports_file) + } + }) + .and_then({ + cloned!(logger, bundle_timeout_ms, self.bundle_applied, self.process); + move |line_num_in_reports_file| { + bundle_applied.fetch_add(1, Ordering::SeqCst); + let response = async move { + wait_till_more_lines(path, line_num_in_reports_file, bundle_timeout_ms).await + } + .boxed() + .compat() + .and_then({ + cloned!(process, logger); + move |report_lines| { + let full_report = report_lines.join("\n"); + let success = !full_report.contains("failed"); + debug!(logger, "sync report: {}", full_report); + if success { + Ok(()) + } else { + process.invalidate(); + let log = match read_file_contents(&mut log_file) { + Ok(log) => format!("hg logs follow:\n{}", log), + Err(e) => format!("no hg logs available ({:?})", e), + }; + Err(format_err!("sync failed: {}", log)) + } + } + }) + .map_err({ + cloned!(process); + move |err| { + info!(logger, "sync failed. Invalidating process"); + process.invalidate(); + err + } + }); + process.ensure_alive( + response, + // even if peer process has died, lets wait for additional grace + // period, and trie to collect the report if any. + Some(Duration::from_secs(1)), + ) + } + }) + .right_future() + } +} + +/// Struct that knows how to work with on-disk mercurial repository. +/// It shells out to `hg` cmd line tool. +#[derive(Clone)] +pub struct HgRepo { + repo_path: Arc, + peer: Arc>, + max_bundles_per_peer: usize, + baseline_bundle_timeout_ms: u64, + verify_server_bookmark_on_failure: bool, +} + +impl HgRepo { + pub fn new( + repo_path: String, + max_bundles_per_peer: usize, + baseline_bundle_timeout_ms: u64, + verify_server_bookmark_on_failure: bool, + ) -> Result { + let peer = HgPeer::new(&repo_path, max_bundles_per_peer, baseline_bundle_timeout_ms)?; + Ok(Self { + repo_path: Arc::new(repo_path), + peer: peer.arc_mutexed(), + max_bundles_per_peer, + baseline_bundle_timeout_ms, + verify_server_bookmark_on_failure, + }) + } + + pub fn apply_bundle( + &self, + bundle_filename: String, + timestamps_path: String, + onto_bookmark: BookmarkName, + expected_bookmark_position: Option, + attempt: usize, + logger: Logger, + ) -> impl Future { + match self.renew_peer_if_needed(logger.clone()) { + Ok(_) => self + .peer + .lock() + .apply_bundle( + &bundle_filename, + ×tamps_path, + onto_bookmark.clone(), + expected_bookmark_position.clone(), + attempt, + logger.clone(), + ) + .or_else({ + let this = self.clone(); + cloned!(onto_bookmark, expected_bookmark_position, logger); + move |sync_error| { + if !this.verify_server_bookmark_on_failure { + return err(sync_error).left_future(); + } + info!( + logger, + "sync failed, let's check if the bookmark is where we want \ + it to be anyway" + ); + this.verify_server_bookmark_location( + &onto_bookmark, + expected_bookmark_position, + ) + .map_err(|_verification_error| sync_error) + .right_future() + } + }) + .boxify(), + Err(e) => err(e).boxify(), + } + } + + fn renew_peer_if_needed(&self, logger: Logger) -> Result<()> { + if !self.peer.lock().still_good(logger.clone()) { + debug!(logger, "killing the old peer"); + self.peer.lock().kill(logger.clone()); + debug!(logger, "renewing hg peer"); + let new_peer = HgPeer::new( + &self.repo_path.clone(), + self.max_bundles_per_peer, + self.baseline_bundle_timeout_ms, + )?; + *self.peer.lock() = new_peer; + Ok(debug!(logger, "done renewing hg peer")) + } else { + Ok(debug!(logger, "existing hg peer is still good")) + } + } + + fn verify_server_bookmark_location( + &self, + name: &BookmarkName, + expected_bookmark_position: Option, + ) -> impl Future { + let name = name.to_string(); + let mut args: Vec = [ + "checkserverbookmark", + // NB: we can't enable extensions.checkserverbookmark universally until it + // is deployed as part of the package. For now, let it be enabled only when + // the appropriate command line flag is present (e.g. when this function is + // called) + "--config", + "extensions.checkserverbookmark=", + "--path", + &self.repo_path.clone(), + "--name", + ] + .iter() + .map(|item| item.to_string()) + .collect(); + args.push(name); + match expected_bookmark_position { + Some(hash) => { + args.push("--hash".into()); + args.push(hash.to_string()); + } + None => args.push("--deleted".into()), + }; + let proc = match get_hg_command(args).stdin(Stdio::piped()).status_async() { + Ok(proc) => proc, + Err(_) => return err(Error::msg("failed to start a mercurial process")).left_future(), + }; + proc.map_err(|e| format_err!("process error: {:?}", e)) + .timeout(Duration::from_millis(BOOKMARK_LOCATION_LOOKUP_TIMEOUT_MS)) + .map_err(remap_timeout_error) + .and_then(|exit_status| { + if exit_status.success() { + ok(()) + } else { + err(Error::msg( + "server does not have a bookmark in the expected location", + )) + } + }) + .right_future() + } +} + +fn remap_timeout_error(err: TimeoutError) -> Error { + match err.into_inner() { + Some(err) => err, + None => Error::msg("timed out waiting for process"), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use assert_matches::assert_matches; + use futures_old::Future; + use tokio_compat::runtime::Runtime; + use tokio_timer::sleep; + + #[test] + fn ensure_alive_alive_process() -> Result<()> { + let mut rt = Runtime::new()?; + + let command = { + let mut command = Command::new("sleep"); + command.args(vec!["2"]); + command + }; + let proc = AsyncProcess::from_command(command)?; + + let fut = proc.ensure_alive(sleep(Duration::from_millis(100)).from_err(), None); + let res = rt.block_on(fut); + assert_matches!(res, Ok(())); + + assert!(proc.is_valid()); + Ok(()) + } + + #[test] + fn ensure_alive_dead_process() -> Result<()> { + let mut rt = Runtime::new()?; + + let proc = AsyncProcess::from_command(Command::new("false"))?; + + let fut = proc.ensure_alive(sleep(Duration::from_secs(5)).from_err(), None); + let res = rt.block_on(fut); + assert_matches!(res, Err(_)); + + assert!(!proc.is_valid()); + Ok(()) + } + + #[test] + fn ensure_alive_grace_period() -> Result<()> { + let mut rt = Runtime::new()?; + + let proc = AsyncProcess::from_command(Command::new("false"))?; + + let fut = proc.ensure_alive( + sleep(Duration::from_secs(1)).from_err(), + Some(Duration::from_secs(10)), + ); + let res = rt.block_on(fut); + assert_matches!(res, Ok(())); + + assert!(!proc.is_valid()); + Ok(()) + } +} diff --git a/eden/mononoke/mononoke_hg_sync_job/src/lfs_verifier.rs b/eden/mononoke/mononoke_hg_sync_job/src/lfs_verifier.rs new file mode 100644 index 0000000000000..9c215d38e5f77 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/lfs_verifier.rs @@ -0,0 +1,148 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use std::sync::Arc; + +use anyhow::{Context, Error}; +use bytes_old::Bytes as BytesOld; +use failure_ext::FutureFailureExt; +use futures_ext::{try_boxfuture, FutureExt}; +use futures_old::{Future, IntoFuture, Stream}; +use http::{status::StatusCode, uri::Uri}; +use hyper::Request; +use hyper::{client::HttpConnector, Client}; +use hyper_openssl::HttpsConnector; +use thiserror::Error; + +use lfs_protocol::{ + ObjectStatus, Operation, RequestBatch, RequestObject, ResponseBatch, Sha256 as LfsSha256, + Transfer, +}; +use mononoke_types::hash::Sha256; + +pub type HttpsHyperClient = Client>; + +#[derive(Debug, Error)] +pub enum ErrorKind { + #[error("Serializing a LFS batch failed")] + SerializationFailed, + #[error("Deserializating a LFS batch failed")] + DeserializationFailed, + #[error("Creating a request failed")] + RequestCreationFailed, + #[error("Submitting a batch request failed")] + BatchRequestNoResponse, + #[error("Submitting a batch request failed with status {0}")] + BatchRequestFailed(StatusCode), + #[error("Reading the response for a batch request failed")] + BatchRequestReadFailed, + #[error("LFS objects are missing: {0:?}")] + LfsObjectsMissing(Vec), +} + +struct LfsVerifierInner { + client: HttpsHyperClient, + batch_uri: Uri, +} + +#[derive(Clone)] +pub struct LfsVerifier { + inner: Arc, +} + +impl LfsVerifier { + pub fn new(batch_uri: Uri) -> Result { + let connector = HttpsConnector::new(4)?; + let client = Client::builder().build(connector); + + let inner = LfsVerifierInner { batch_uri, client }; + + Ok(Self { + inner: Arc::new(inner), + }) + } + + pub fn verify_lfs_presence( + &self, + blobs: &[(Sha256, u64)], + ) -> impl Future { + let batch = build_download_request_batch(blobs); + let body: BytesOld = + try_boxfuture!(serde_json::to_vec(&batch).context(ErrorKind::SerializationFailed)) + .into(); + + let uri = self.inner.batch_uri.clone(); + let req = try_boxfuture!(Request::post(uri) + .body(body.into()) + .context(ErrorKind::RequestCreationFailed)); + + self.inner + .client + .request(req) + .context(ErrorKind::BatchRequestNoResponse) + .map_err(Error::from) + .and_then(|response| { + let (head, body) = response.into_parts(); + + if !head.status.is_success() { + return Err(ErrorKind::BatchRequestFailed(head.status).into()) + .into_future() + .left_future(); + } + + body.concat2() + .context(ErrorKind::BatchRequestReadFailed) + .map_err(Error::from) + .right_future() + }) + .and_then(|body| { + serde_json::from_slice::(&body) + .context(ErrorKind::DeserializationFailed) + .map_err(Error::from) + }) + .and_then(|batch| { + let missing_objects = find_missing_objects(batch); + + if missing_objects.is_empty() { + return Ok(()); + } + + Err(ErrorKind::LfsObjectsMissing(missing_objects).into()) + }) + .boxify() + } +} + +fn build_download_request_batch(blobs: &[(Sha256, u64)]) -> RequestBatch { + let objects = blobs + .iter() + .map(|(oid, size)| RequestObject { + oid: LfsSha256(oid.into_inner()), + size: *size, + }) + .collect(); + + RequestBatch { + operation: Operation::Download, + r#ref: None, + transfers: vec![Transfer::Basic], + objects, + } +} + +fn find_missing_objects(batch: ResponseBatch) -> Vec { + batch + .objects + .into_iter() + .filter_map(|object| match object.status { + ObjectStatus::Ok { ref actions, .. } if actions.contains_key(&Operation::Download) => { + None + } + _ => Some(object.object), + }) + .collect() +} diff --git a/eden/mononoke/mononoke_hg_sync_job/src/listserverbookmarks.py b/eden/mononoke/mononoke_hg_sync_job/src/listserverbookmarks.py new file mode 100644 index 0000000000000..ac0eef5fd8960 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/listserverbookmarks.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python2 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2. + +from edenscm.mercurial import hg, registrar +from edenscm.mercurial.i18n import _ + + +cmdtable = {} +command = registrar.command(cmdtable) + + +@command( + "^listserverbookmarks", + [("", "path", "", _("hg server remotepath (ssh)"), "")], + _("[OPTION]..."), + norepo=True, +) +def listserverbookmarks(ui, **opts): + """List the bookmarks for a remote server""" + path = opts["path"] + remote = hg.peer(ui, {}, path) + bookmarks = remote.listkeys("bookmarks") + + for pair in bookmarks.items(): + ui.write("%s\1%s\0" % pair) + ui.flush() diff --git a/eden/mononoke/mononoke_hg_sync_job/src/main.rs b/eden/mononoke/mononoke_hg_sync_job/src/main.rs new file mode 100644 index 0000000000000..30f7f73dc3213 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/main.rs @@ -0,0 +1,1259 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +#![feature(optin_builtin_traits)] +#![feature(negative_impls)] +#![deny(warnings)] + +/// Mononoke -> hg sync job +/// +/// It's a special job that is used to synchronize Mononoke to Mercurial when Mononoke is a source +/// of truth. All writes to Mononoke are replayed to Mercurial using this job. That can be used +/// to verify Mononoke's correctness and/or use hg as a disaster recovery mechanism. +use anyhow::{bail, format_err, Error, Result}; +use blobrepo_hg::BlobRepoHg; +use bookmarks::{BookmarkName, BookmarkUpdateLog, BookmarkUpdateLogEntry, Freshness}; +use bundle_generator::FilenodeVerifier; +use bundle_preparer::{BundlePreparer, PreparedBookmarkUpdateLogEntry}; +use clap::{Arg, ArgMatches, SubCommand}; +use cloned::cloned; +use cmdlib::{args, helpers::block_execute}; +use context::CoreContext; +use dbbookmarks::SqlBookmarksBuilder; +use fbinit::FacebookInit; +use futures::{ + compat::Future01CompatExt, + future::{try_join, FutureExt as _, TryFutureExt}, + stream::TryStreamExt, +}; +use futures_ext::{spawn_future, try_boxfuture, BoxFuture, FutureExt, StreamExt}; +use futures_old::{ + future::{err, join_all, ok, IntoFuture}, + stream, + stream::Stream, + Future, +}; +use futures_stats::{FutureStats, Timed}; +use http::Uri; +use lfs_verifier::LfsVerifier; +use mercurial_types::HgChangesetId; +use metaconfig_types::HgsqlName; +use metaconfig_types::RepoReadOnly; +use mononoke_hg_sync_job_helper_lib::{ + merge_bundles, merge_timestamp_files, retry, RetryAttemptsCount, +}; +use mononoke_types::{ChangesetId, RepositoryId}; +use mutable_counters::{MutableCounters, SqlMutableCounters}; +use regex::Regex; +use repo_read_write_status::{RepoReadWriteFetcher, SqlRepoReadWriteStatus}; +use scuba_ext::ScubaSampleBuilder; +use slog::{error, info}; +use sql_construct::{facebook::FbSqlConstruct, SqlConstruct}; +use sql_ext::facebook::{myrouter_ready, MysqlOptions}; + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; +use tempfile::NamedTempFile; +use tokio_timer::sleep; + +mod bundle_generator; +mod bundle_preparer; +mod errors; +mod globalrev_syncer; +mod hgrepo; +mod lfs_verifier; + +use errors::{ + ErrorKind::SyncFailed, + PipelineError::{self, AnonymousError, EntryError}, +}; +use globalrev_syncer::GlobalrevSyncer; +use hgrepo::{list_hg_server_bookmarks, HgRepo}; +use hgserver_config::ServerConfig; + +const ARG_BOOKMARK_REGEX_FORCE_GENERATE_LFS: &str = "bookmark-regex-force-generate-lfs"; +const GENERATE_BUNDLES: &str = "generate-bundles"; +const MODE_SYNC_ONCE: &'static str = "sync-once"; +const MODE_SYNC_LOOP: &'static str = "sync-loop"; +const LATEST_REPLAYED_REQUEST_KEY: &'static str = "latest-replayed-request"; +const SLEEP_SECS: u64 = 1; +const SCUBA_TABLE: &'static str = "mononoke_hg_sync"; +const UNLOCK_REASON: &str = "Unlocked by successful sync"; +const LOCK_REASON: &str = "Locked due to sync failure, check Source Control @ FB"; + +const HGSQL_GLOBALREVS_USE_SQLITE: &str = "hgsql-globalrevs-use-sqlite"; +const HGSQL_GLOBALREVS_DB_ADDR: &str = "hgsql-globalrevs-db-addr"; + +const DEFAULT_RETRY_NUM: usize = 3; +const DEFAULT_BATCH_SIZE: usize = 10; +const DEFAULT_SINGLE_BUNDLE_TIMEOUT_MS: u64 = 5 * 60 * 1000; + +const CONFIGERATOR_HGSERVER_PATH: &str = "configerator:scm/mononoke/hgserverconf/hgserver"; + +#[derive(Copy, Clone)] +struct QueueSize(usize); + +struct PipelineState { + entries: Vec, + data: T, +} + +type OutcomeWithStats = + Result<(FutureStats, PipelineState), (Option, PipelineError)>; + +type Outcome = Result, PipelineError>; + +fn get_id_to_search_after(entries: &[BookmarkUpdateLogEntry]) -> i64 { + entries.iter().map(|entry| entry.id).max().unwrap_or(0) +} + +fn bind_sync_err(entries: &[BookmarkUpdateLogEntry], cause: Error) -> PipelineError { + let ids: Vec = entries.iter().map(|entry| entry.id).collect(); + let entries = entries.to_vec(); + EntryError { + entries, + cause: (SyncFailed { ids, cause }).into(), + } +} + +fn bind_sync_result( + entries: &[BookmarkUpdateLogEntry], + res: Result, +) -> Result, PipelineError> { + match res { + Ok(data) => Ok(PipelineState { + entries: entries.to_vec(), + data, + }), + Err(cause) => Err(bind_sync_err(entries, cause)), + } +} + +fn drop_outcome_stats(o: OutcomeWithStats) -> Outcome { + o.map(|(_, r)| r).map_err(|(_, e)| e) +} + +fn build_reporting_handler( + ctx: CoreContext, + scuba_sample: ScubaSampleBuilder, + retry_num: usize, + bookmarks: impl BookmarkUpdateLog, +) -> impl Fn(OutcomeWithStats) -> BoxFuture, PipelineError> { + move |res| { + cloned!(ctx, scuba_sample); + + let log_entries = match &res { + Ok((_, pipeline_state, ..)) => Some(pipeline_state.entries.clone()), + Err((_, EntryError { entries, .. })) => Some(entries.clone()), + Err((_, AnonymousError { .. })) => None, + }; + + let maybe_stats = match &res { + Ok((stats, _)) => Some(stats), + Err((stats, _)) => stats.as_ref(), + }; + + // TODO: (torozco) T43766262 We should embed attempts in retry()'s Error type and use it + // here instead of receiving a plain ErrorKind and implicitly assuming retry_num attempts. + let attempts = match &res { + Ok((_, PipelineState { data: attempts, .. })) => attempts.clone(), + Err(..) => RetryAttemptsCount(retry_num), + }; + + let maybe_error = match &res { + Ok(..) => None, + Err((_, EntryError { cause, .. })) => Some(cause), + Err((_, AnonymousError { cause, .. })) => Some(cause), + }; + + let fut = match log_entries { + None => ok(()).right_future(), + Some(log_entries) => { + if log_entries.len() == 0 { + err(Error::msg("unexpected empty pipeline state")).right_future() + } else { + let duration = maybe_stats + .map(|s| s.completion_time) + .unwrap_or(Duration::from_secs(0)); + + let error = maybe_error.map(|e| format!("{:?}", e)); + let next_id = get_id_to_search_after(&log_entries); + + bookmarks + .count_further_bookmark_log_entries(ctx.clone(), next_id as u64, None) + .compat() + .map(|n| QueueSize(n as usize)) + .map({ + cloned!(log_entries); + move |queue_size| { + info!( + ctx.logger(), + "queue size after processing: {}", queue_size.0 + ); + log_processed_entries_to_scuba( + &log_entries, + scuba_sample, + error, + attempts, + duration, + queue_size, + ); + } + }) + .left_future() + } + } + }; + + fut.then(|_| drop_outcome_stats(res)).boxify() + } +} + +fn get_read_write_fetcher( + mysql_options: MysqlOptions, + repo_lock_db_addr: Option<&str>, + hgsql_name: HgsqlName, + lock_on_failure: bool, + use_sqlite: bool, + readonly_storage: bool, +) -> Result<(Option, RepoReadWriteFetcher)> { + let unlock_via: Result = match repo_lock_db_addr { + Some(repo_lock_db_addr) => { + let sql_repo_read_write_status = if use_sqlite { + let path = Path::new(repo_lock_db_addr); + SqlRepoReadWriteStatus::with_sqlite_path(path, readonly_storage) + } else { + match mysql_options.myrouter_port { + Some(myrouter_port) => Ok(SqlRepoReadWriteStatus::with_myrouter( + repo_lock_db_addr.to_string(), + myrouter_port, + mysql_options.read_connection_type(), + readonly_storage, + )), + None => Err(Error::msg("myrouter_port not specified in mysql mode")), + } + }; + sql_repo_read_write_status.and_then(|connection| { + Ok(RepoReadWriteFetcher::new( + Some(connection), + RepoReadOnly::ReadWrite, + hgsql_name, + )) + }) + } + None => { + if lock_on_failure { + Err(Error::msg( + "repo_lock_db_addr not specified with lock_on_failure", + )) + } else { + Ok(RepoReadWriteFetcher::new( + None, + RepoReadOnly::ReadWrite, + hgsql_name, + )) + } + } + }; + + unlock_via.and_then(|v| { + let lock_via = if lock_on_failure { + Some(v.clone()) + } else { + None + }; + Ok((lock_via, v)) + }) +} + +fn unlock_repo_if_locked( + ctx: CoreContext, + read_write_fetcher: RepoReadWriteFetcher, +) -> impl Future { + read_write_fetcher + .readonly() + .and_then(move |repo_state| match repo_state { + RepoReadOnly::ReadOnly(ref lock_msg) if lock_msg == LOCK_REASON => read_write_fetcher + .set_mononoke_read_write(&UNLOCK_REASON.to_string()) + .map(move |updated| { + if updated { + info!(ctx.logger(), "repo is unlocked"); + } + }) + .left_future(), + RepoReadOnly::ReadOnly(..) | RepoReadOnly::ReadWrite => ok(()).right_future(), + }) +} + +fn lock_repo_if_unlocked( + ctx: CoreContext, + read_write_fetcher: RepoReadWriteFetcher, +) -> impl Future { + info!(ctx.logger(), "locking repo..."); + read_write_fetcher + .readonly() + .and_then(move |repo_state| match repo_state { + RepoReadOnly::ReadWrite => read_write_fetcher + .set_read_only(&LOCK_REASON.to_string()) + .map(move |updated| { + if updated { + info!(ctx.logger(), "repo is locked now"); + } + }) + .left_future(), + + RepoReadOnly::ReadOnly(ref lock_msg) => { + ok(info!(ctx.logger(), "repo is locked already: {}", lock_msg)).right_future() + } + }) +} + +fn build_outcome_handler( + ctx: CoreContext, + lock_via: Option, +) -> impl Fn(Outcome) -> BoxFuture, Error> { + move |res| match res { + Ok(PipelineState { entries, .. }) => { + info!( + ctx.logger(), + "successful sync of entries {:?}", + entries.iter().map(|c| c.id).collect::>() + ); + ok(entries).boxify() + } + Err(AnonymousError { cause: e }) => { + info!(ctx.logger(), "error without entry"); + err(e.into()).boxify() + } + Err(EntryError { cause: e, .. }) => match &lock_via { + Some(repo_read_write_fetcher) => { + cloned!(ctx, repo_read_write_fetcher); + lock_repo_if_unlocked(ctx, repo_read_write_fetcher) + .then(move |_| err(e.into())) + .boxify() + } + None => err(e.into()).boxify(), + }, + } +} + +#[derive(Clone)] +struct CombinedBookmarkUpdateLogEntry { + components: Vec, + bundle_file: Arc, + timestamps_file: Arc, + cs_id: Option<(ChangesetId, HgChangesetId)>, + bookmark: BookmarkName, +} + +fn combine_entries( + ctx: CoreContext, + entries: &[PreparedBookmarkUpdateLogEntry], +) -> impl Future { + let bundle_file_paths: Vec = entries + .iter() + .map(|prepared_entry| prepared_entry.bundle_file.path().to_path_buf()) + .collect(); + let timestamp_file_paths: Vec = entries + .iter() + .map(|prepared_entry| prepared_entry.timestamps_file.path().to_path_buf()) + .collect(); + let components: Vec<_> = entries + .iter() + .map(|prepared_entry| prepared_entry.log_entry.clone()) + .collect(); + let last_entry = match entries.iter().last() { + None => { + return err(Error::msg( + "cannot create a combined entry from an empty list", + )) + .left_future() + } + Some(entry) => entry.clone(), + }; + + async move { + try_join( + merge_bundles(&ctx, &bundle_file_paths), + merge_timestamp_files(&ctx, ×tamp_file_paths), + ) + .await + } + .boxed() + .compat() + .map(move |(combined_bundle_file, combined_timestamps_file)| { + let PreparedBookmarkUpdateLogEntry { + cs_id, log_entry, .. + } = last_entry; + CombinedBookmarkUpdateLogEntry { + components, + bundle_file: Arc::new(combined_bundle_file), + timestamps_file: Arc::new(combined_timestamps_file), + cs_id, + bookmark: log_entry.bookmark_name, + } + }) + .right_future() +} + +/// Sends a downloaded bundle to hg +fn try_sync_single_combined_entry( + ctx: CoreContext, + attempt: usize, + combined_entry: CombinedBookmarkUpdateLogEntry, + hg_repo: HgRepo, +) -> impl Future { + let CombinedBookmarkUpdateLogEntry { + components, + bundle_file, + timestamps_file, + cs_id, + bookmark, + } = combined_entry; + let ids: Vec<_> = components.iter().map(|entry| entry.id).collect(); + info!(ctx.logger(), "syncing log entries {:?} ...", ids); + + let bundle_path = try_boxfuture!(get_path(&bundle_file)); + let timestamps_path = try_boxfuture!(get_path(×tamps_file)); + + hg_repo + .apply_bundle( + bundle_path, + timestamps_path, + bookmark, + cs_id.map(|(_bcs_id, hg_cs_id)| hg_cs_id), + attempt, + ctx.logger().clone(), + ) + .map(move |()| { + // Make sure temp file is dropped only after bundle was applied is done + let _ = bundle_file; + let _ = timestamps_file; + }) + .boxify() +} + +fn sync_single_combined_entry( + ctx: CoreContext, + combined_entry: CombinedBookmarkUpdateLogEntry, + hg_repo: HgRepo, + base_retry_delay_ms: u64, + retry_num: usize, + globalrev_syncer: GlobalrevSyncer, +) -> impl Future { + let sync_globalrevs = if let Some((cs_id, _hg_cs_id)) = combined_entry.cs_id { + async move { globalrev_syncer.sync(cs_id).await } + .boxed() + .compat() + .left_future() + } else { + Ok(()).into_future().right_future() + }; + + sync_globalrevs.and_then(move |()| { + retry( + ctx.logger().clone(), + { + cloned!(ctx, combined_entry); + move |attempt| { + try_sync_single_combined_entry( + ctx.clone(), + attempt, + combined_entry.clone(), + hg_repo.clone(), + ) + } + }, + base_retry_delay_ms, + retry_num, + ) + .map(|(_, attempts)| attempts) + }) +} + +/// Logs to Scuba information about a single bundle sync event +fn log_processed_entry_to_scuba( + log_entry: &BookmarkUpdateLogEntry, + mut scuba_sample: ScubaSampleBuilder, + error: Option, + attempts: RetryAttemptsCount, + duration: Duration, + queue_size: QueueSize, +) { + let entry = log_entry.id; + let book = format!("{}", log_entry.bookmark_name); + let reason = format!("{}", log_entry.reason); + let delay = log_entry.timestamp.since_seconds(); + + scuba_sample + .add("entry", entry) + .add("bookmark", book) + .add("reason", reason) + .add("attempts", attempts.0) + .add("duration", duration.as_millis() as i64); + + match error { + Some(error) => { + scuba_sample.add("success", 0).add("err", error); + } + None => { + scuba_sample.add("success", 1).add("delay", delay); + scuba_sample.add("queue_size", queue_size.0); + } + }; + + scuba_sample.log(); +} + +fn log_processed_entries_to_scuba( + entries: &[BookmarkUpdateLogEntry], + scuba_sample: ScubaSampleBuilder, + error: Option, + attempts: RetryAttemptsCount, + duration: Duration, + queue_size: QueueSize, +) { + let n: f64 = entries.len() as f64; + let individual_duration = duration.div_f64(n); + entries.iter().for_each(|entry| { + log_processed_entry_to_scuba( + entry, + scuba_sample.clone(), + error.clone(), + attempts, + individual_duration, + queue_size, + ) + }); +} + +fn get_path(f: &NamedTempFile) -> Result { + f.path() + .to_str() + .map(|s| s.to_string()) + .ok_or(Error::msg("non-utf8 file")) +} + +fn loop_over_log_entries( + ctx: CoreContext, + bookmarks: impl BookmarkUpdateLog, + repo_id: RepositoryId, + start_id: i64, + loop_forever: bool, + scuba_sample: ScubaSampleBuilder, + fetch_up_to_bundles: u64, + repo_read_write_fetcher: RepoReadWriteFetcher, +) -> impl Stream, Error = Error> { + stream::unfold(Some(start_id), move |maybe_id| match maybe_id { + Some(current_id) => Some( + bookmarks + .read_next_bookmark_log_entries_same_bookmark_and_reason( + ctx.clone(), + current_id as u64, + fetch_up_to_bundles, + ) + .compat() + .collect() + .and_then({ + cloned!(ctx, repo_read_write_fetcher, mut scuba_sample); + move |entries| match entries.iter().last().cloned() { + None => { + if loop_forever { + info!(ctx.logger(), "id: {}, no new entries found", current_id); + scuba_sample + .add("repo", repo_id.id()) + .add("success", 1) + .add("delay", 0) + .log(); + + // First None means that no new entries will be added to the stream, + // Some(current_id) means that bookmarks will be fetched again + sleep(Duration::new(SLEEP_SECS, 0)) + .from_err() + .and_then({ + cloned!(ctx, repo_read_write_fetcher); + move |()| { + unlock_repo_if_locked(ctx, repo_read_write_fetcher) + } + }) + .map(move |()| (vec![], Some(current_id))) + .right_future() + } else { + ok((vec![], None)).left_future() + } + } + Some(last_entry) => ok((entries, Some(last_entry.id))).left_future(), + } + }), + ), + None => None, + }) +} + +#[derive(Clone)] +pub struct BookmarkOverlay { + bookmarks: Arc>, + overlay: HashMap>, +} + +impl BookmarkOverlay { + fn new(bookmarks: Arc>) -> Self { + Self { + bookmarks, + overlay: HashMap::new(), + } + } + + fn update(&mut self, book: BookmarkName, val: Option) { + self.overlay.insert(book, val); + } + + fn get_bookmark_values(&self) -> Vec { + let mut res = vec![]; + for key in self.bookmarks.keys().chain(self.overlay.keys()) { + if let Some(val) = self.overlay.get(key) { + res.extend(val.clone().into_iter()); + } else if let Some(val) = self.bookmarks.get(key) { + res.push(*val); + } + } + + res + } +} + +fn run(ctx: CoreContext, matches: ArgMatches<'static>) -> BoxFuture<(), Error> { + let hg_repo_path = match matches.value_of("hg-repo-ssh-path") { + Some(hg_repo_path) => hg_repo_path.to_string(), + None => { + error!(ctx.logger(), "Path to hg repository must be specified"); + std::process::exit(1); + } + }; + + let log_to_scuba = matches.is_present("log-to-scuba"); + let mut scuba_sample = if log_to_scuba { + ScubaSampleBuilder::new(ctx.fb, SCUBA_TABLE) + } else { + ScubaSampleBuilder::with_discard() + }; + scuba_sample.add_common_server_data(); + + let mysql_options = args::parse_mysql_options(&matches); + let readonly_storage = args::parse_readonly_storage(&matches); + + let repo_id = args::get_repo_id(ctx.fb, &matches).expect("need repo id"); + let repo_config = args::get_config(ctx.fb, &matches); + let (repo_name, repo_config) = try_boxfuture!(repo_config); + + let base_retry_delay_ms = args::get_u64_opt(&matches, "base-retry-delay-ms").unwrap_or(1000); + let retry_num = args::get_usize(&matches, "retry-num", DEFAULT_RETRY_NUM); + + let generate_bundles = matches.is_present(GENERATE_BUNDLES); + let bookmark_regex_force_lfs = try_boxfuture!(matches + .value_of(ARG_BOOKMARK_REGEX_FORCE_GENERATE_LFS) + .map(Regex::new) + .transpose()); + + let lfs_params = repo_config.lfs.clone(); + + let filenode_verifier = match matches.value_of("verify-lfs-blob-presence") { + Some(uri) => { + let uri = try_boxfuture!(uri.parse::()); + let verifier = try_boxfuture!(LfsVerifier::new(uri)); + FilenodeVerifier::LfsVerifier(verifier) + } + None => FilenodeVerifier::NoopVerifier, + }; + + let hgsql_use_sqlite = matches.is_present(HGSQL_GLOBALREVS_USE_SQLITE); + let hgsql_db_addr = matches + .value_of(HGSQL_GLOBALREVS_DB_ADDR) + .map(|a| a.to_string()); + + let repo_parts = args::open_repo(ctx.fb, &ctx.logger(), &matches).and_then({ + cloned!(ctx, hg_repo_path); + let fb = ctx.fb; + let maybe_skiplist_blobstore_key = repo_config.skiplist_index_blobstore_key.clone(); + let hgsql_globalrevs_name = repo_config.hgsql_globalrevs_name.clone(); + move |repo| { + let overlay = list_hg_server_bookmarks(hg_repo_path.clone()) + .and_then({ + cloned!(ctx, repo); + move |bookmarks| { + stream::iter_ok(bookmarks.into_iter()) + .map(move |(book, hg_cs_id)| { + repo.get_bonsai_from_hg(ctx.clone(), hg_cs_id).map( + move |maybe_bcs_id| maybe_bcs_id.map(|bcs_id| (book, bcs_id)), + ) + }) + .buffered(100) + .filter_map(|x| x) + .collect_to::>() + } + }) + .map(Arc::new) + .map(BookmarkOverlay::new); + + let preparer = if generate_bundles { + BundlePreparer::new_generate_bundles( + ctx, + repo.clone(), + base_retry_delay_ms, + retry_num, + maybe_skiplist_blobstore_key, + lfs_params, + filenode_verifier, + bookmark_regex_force_lfs, + ) + .boxify() + } else { + BundlePreparer::new_use_existing(repo.clone(), base_retry_delay_ms, retry_num) + .boxify() + }; + + let globalrev_syncer = { + async move { + if !generate_bundles && hgsql_db_addr.is_some() { + return Err(format_err!( + "Syncing globalrevs ({}) requires generating bundles ({})", + HGSQL_GLOBALREVS_DB_ADDR, + GENERATE_BUNDLES + )); + } + + GlobalrevSyncer::new( + fb, + repo, + hgsql_use_sqlite, + hgsql_db_addr.as_ref().map(|a| a.as_ref()), + mysql_options, + readonly_storage.0, + hgsql_globalrevs_name, + ) + .await + } + } + .boxed() + .compat(); + + preparer.map(Arc::new).join3(overlay, globalrev_syncer) + } + }); + + let batch_size = args::get_usize(&matches, "batch-size", DEFAULT_BATCH_SIZE); + let single_bundle_timeout_ms = args::get_u64( + &matches, + "single-bundle-timeout-ms", + DEFAULT_SINGLE_BUNDLE_TIMEOUT_MS, + ); + let verify_server_bookmark_on_failure = matches.is_present("verify-server-bookmark-on-failure"); + let hg_repo = hgrepo::HgRepo::new( + hg_repo_path, + batch_size, + single_bundle_timeout_ms, + verify_server_bookmark_on_failure, + ); + let repos = repo_parts.join(hg_repo); + scuba_sample.add("repo", repo_id.id()); + scuba_sample.add("reponame", repo_name.clone()); + + let myrouter_ready_fut = myrouter_ready( + repo_config.primary_metadata_db_address(), + mysql_options, + ctx.logger().clone(), + ); + let bookmarks = args::open_sql::(ctx.fb, &matches); + + myrouter_ready_fut + .join(bookmarks) + .and_then(move |(_, bookmarks)| { + let bookmarks = bookmarks.with_repo_id(repo_id); + let reporting_handler = build_reporting_handler( + ctx.clone(), + scuba_sample.clone(), + retry_num, + bookmarks.clone(), + ); + + let repo_lockers = get_read_write_fetcher( + mysql_options, + try_boxfuture!(get_repo_sqldb_address(&ctx, &matches, &repo_config.hgsql_name)).as_deref(), + repo_config.hgsql_name.clone(), + matches.is_present("lock-on-failure"), + matches.is_present("repo-lock-sqlite"), + readonly_storage.0, + ); + + let (lock_via, unlock_via) = try_boxfuture!(repo_lockers); + + match matches.subcommand() { + (MODE_SYNC_ONCE, Some(sub_m)) => { + let start_id = try_boxfuture!(args::get_usize_opt(&sub_m, "start-id") + .ok_or(Error::msg("--start-id must be specified"))); + + bookmarks + .read_next_bookmark_log_entries(ctx.clone(), start_id as u64, 1, Freshness::MaybeStale) + .compat() + .collect() + .map(|entries| entries.into_iter().next()) + .join(repos) + .and_then({ + cloned!(ctx); + move |(maybe_log_entry, ((bundle_preparer, overlay, globalrev_syncer), hg_repo))| { + if let Some(log_entry) = maybe_log_entry { + bundle_preparer.prepare_single_bundle( + ctx.clone(), + log_entry.clone(), + overlay.clone(), + ) + .and_then({ + cloned!(ctx); + |prepared_log_entry| { + combine_entries(ctx, &vec![prepared_log_entry]) + } + }) + .and_then({ + cloned!(ctx); + move |combined_entry| { + sync_single_combined_entry( + ctx.clone(), + combined_entry, + hg_repo, + base_retry_delay_ms, + retry_num, + globalrev_syncer.clone(), + ) + } + }) + .then(move |r| { + bind_sync_result(&vec![log_entry], r).into_future() + }) + .collect_timing() + .map_err(|(stats, e)| (Some(stats), e)) + .then(reporting_handler) + .then(build_outcome_handler(ctx.clone(), lock_via)) + .map(|_| ()) + .left_future() + } else { + info!(ctx.logger(), "no log entries found"); + Ok(()).into_future().right_future() + } + } + }) + .boxify() + } + (MODE_SYNC_LOOP, Some(sub_m)) => { + let start_id = args::get_i64_opt(&sub_m, "start-id"); + let bundle_buffer_size = + args::get_usize_opt(&sub_m, "bundle-prefetch").unwrap_or(0) + 1; + let combine_bundles = args::get_u64_opt(&sub_m, "combine-bundles").unwrap_or(1); + if combine_bundles != 1 { + panic!( + "For now, we don't allow combining bundles. See T43929272 for details" + ); + } + let loop_forever = sub_m.is_present("loop-forever"); + let mutable_counters = args::open_sql::(ctx.fb, &matches); + let exit_path = sub_m + .value_of("exit-file") + .map(|name| Path::new(name).to_path_buf()); + + // NOTE: We poll this callback twice: + // - Once after possibly pulling a new piece of work. + // - Once after pulling a prepared piece of work. + // + // This ensures that we exit ASAP in the two following cases: + // - There is no work whatsoever. The first check exits early. + // - There is a lot of buffered work. The 2nd check exits early without doing it all. + let can_continue = Arc::new({ + cloned!(ctx); + move || match exit_path { + Some(ref exit_path) if exit_path.exists() => { + info!(ctx.logger(), "path {:?} exists: exiting ...", exit_path); + false + } + _ => true, + } + }); + + mutable_counters + .and_then(move |mutable_counters| { + let counter = mutable_counters + .get_counter(ctx.clone(), repo_id, LATEST_REPLAYED_REQUEST_KEY) + .and_then(move |maybe_counter| { + maybe_counter + .or_else(move || start_id) + .ok_or(format_err!( + "{} counter not found. Pass `--start-id` flag to set the counter", + LATEST_REPLAYED_REQUEST_KEY + )) + }); + + cloned!(ctx); + counter + .join(repos) + .map(move |(start_id, repos)| { + let ((bundle_preparer, mut overlay, globalrev_syncer), hg_repo) = repos; + + loop_over_log_entries( + ctx.clone(), + bookmarks.clone(), + repo_id, + start_id, + loop_forever, + scuba_sample.clone(), + combine_bundles, + unlock_via.clone(), + ) + .take_while({ + cloned!(can_continue); + move |_| ok(can_continue()) + }) + .filter_map(|entry_vec| { + if entry_vec.len() == 0 { + None + } else { + Some(entry_vec) + } + }) + .map_err(|cause| AnonymousError { cause }) + .map({ + cloned!(ctx, bundle_preparer); + move |entries: Vec| { + cloned!(ctx, bundle_preparer); + let mut futs = vec![]; + for log_entry in entries { + let f = bundle_preparer.prepare_single_bundle( + ctx.clone(), + log_entry.clone(), + overlay.clone(), + ); + let f = spawn_future(f) + .map_err({ + cloned!(log_entry); + move |err| { + bind_sync_err(&vec![log_entry], err) + } + }) + // boxify is used here because of the + // type_length_limit limitation, which gets exceeded + // if we use heap-allocated types + .boxify(); + overlay.update( + log_entry.bookmark_name.clone(), + log_entry.to_changeset_id.clone(), + ); + futs.push(f); + } + + join_all(futs).and_then({ + cloned!(ctx); + |prepared_log_entries| { + combine_entries(ctx, &prepared_log_entries) + .map_err(|e| { + bind_sync_err( + &prepared_log_entries + .into_iter() + .map(|prepared_entry| { + prepared_entry.log_entry + }) + .collect::>(), + e, + ) + }) + } + }) + } + }) + .buffered(bundle_buffer_size) + .take_while({ + cloned!(can_continue); + move |_| ok(can_continue()) + }) + .then({ + cloned!(ctx, hg_repo); + + move |res| match res { + Ok(combined_entry) => sync_single_combined_entry( + ctx.clone(), + combined_entry.clone(), + hg_repo.clone(), + base_retry_delay_ms, + retry_num, + globalrev_syncer.clone(), + ) + .then(move |r| { + bind_sync_result(&combined_entry.components, r) + }) + .collect_timing() + .map_err(|(stats, e)| (Some(stats), e)) + .left_future(), + Err(e) => err((None, e)).right_future(), + } + }) + .then(reporting_handler) + .then(build_outcome_handler(ctx.clone(), lock_via)) + .map(move |entry| { + let next_id = get_id_to_search_after(&entry); + retry( + ctx.logger().clone(), + { + cloned!(ctx, mutable_counters); + move |_| { + mutable_counters.set_counter( + ctx.clone(), + repo_id, + LATEST_REPLAYED_REQUEST_KEY, + next_id, + // TODO(stash): do we need conditional updates here? + None, + ) + .and_then(|success| { + if success { + Ok(()) + } else { + bail!("failed to update counter") + } + }) + } + }, + base_retry_delay_ms, + retry_num, + ) + }) + }) + .flatten_stream() + .for_each(|res| res.map(|_| ())) + .boxify() + }) + .boxify() + } + _ => { + error!(ctx.logger(), "incorrect mode of operation is specified"); + std::process::exit(1); + } + } + }) + .boxify() +} + +fn get_repo_sqldb_address<'a>( + ctx: &CoreContext, + matches: &ArgMatches<'a>, + repo_name: &HgsqlName, +) -> Result, Error> { + if let Some(db_addr) = matches.value_of("repo-lock-db-address") { + return Ok(Some(db_addr.to_string())); + } + if !matches.is_present("lock-on-failure") { + return Ok(None); + } + let handle = args::get_config_handle( + ctx.fb, + ctx.logger().clone(), + Some(CONFIGERATOR_HGSERVER_PATH), + 1, + )?; + let config: Arc = handle.get(); + match config.sql_confs.get(AsRef::::as_ref(repo_name)) { + Some(sql_conf) => Ok(Some(sql_conf.db_tier.clone())), + None => Ok(Some(config.sql_conf_default.db_tier.clone())), + } +} + +#[fbinit::main] +fn main(fb: FacebookInit) -> Result<()> { + let app = args::MononokeApp::new("Mononoke -> hg sync job") + .with_advanced_args_hidden() + .with_fb303_args() + .build() + .arg( + Arg::with_name("hg-repo-ssh-path") + .takes_value(true) + .required(true) + .help("Remote path to hg repo to replay to. Example: ssh://hg.vip.facebook.com//data/scm/fbsource"), + ) + .arg( + Arg::with_name("log-to-scuba") + .long("log-to-scuba") + .takes_value(false) + .required(false) + .help("If set job will log individual bundle sync states to Scuba"), + ) + .arg( + Arg::with_name("lock-on-failure") + .long("lock-on-failure") + .takes_value(false) + .required(false) + .help("If set, mononoke repo will be locked on sync failure"), + ) + .arg( + Arg::with_name("base-retry-delay-ms") + .long("base-retry-delay-ms") + .takes_value(true) + .required(false) + .help("initial delay between failures. It will be increased on the successive attempts") + ) + .arg( + Arg::with_name("retry-num") + .long("retry-num") + .takes_value(true) + .required(false) + .help("how many times to retry to sync a single bundle") + ) + .arg( + Arg::with_name("batch-size") + .long("batch-size") + .takes_value(true) + .required(false) + .help("maximum number of bundles allowed over a single hg peer") + ) + .arg( + Arg::with_name("single-bundle-timeout-ms") + .long("single-bundle-timeout-ms") + .takes_value(true) + .required(false) + .help("a timeout to send a single bundle to (if exceeded, the peer is restarted)") + ) + .arg( + Arg::with_name("verify-server-bookmark-on-failure") + .long("verify-server-bookmark-on-failure") + .takes_value(false) + .required(false) + .help("if present, check after a failure whether a server bookmark is already in the expected location") + ) + .arg( + Arg::with_name("repo-lock-sqlite") + .long("repo-lock-sqlite") + .takes_value(false) + .required(false) + .help("Enable sqlite for repo_lock access, path is in repo-lock-db-address"), + ) + .arg( + Arg::with_name("repo-lock-db-address") + .long("repo-lock-db-address") + .takes_value(true) + .required(false) + .help("Db with repo_lock table. Will be used to lock/unlock repo"), + ) + .arg( + Arg::with_name(HGSQL_GLOBALREVS_USE_SQLITE) + .long(HGSQL_GLOBALREVS_USE_SQLITE) + .takes_value(false) + .required(false) + .help("Use sqlite for hgsql globalrev sync (use for testing)."), + ) + .arg( + Arg::with_name(HGSQL_GLOBALREVS_DB_ADDR) + .long(HGSQL_GLOBALREVS_DB_ADDR) + .takes_value(true) + .required(false) + .help("Sync globalrevs to this database prior to syncing bundles."), + ) + .arg( + Arg::with_name(GENERATE_BUNDLES) + .long(GENERATE_BUNDLES) + .takes_value(false) + .required(false) + .help("Generate new bundles instead of using bundles that were saved on Mononoke during push"), + ) + .arg( + Arg::with_name(ARG_BOOKMARK_REGEX_FORCE_GENERATE_LFS) + .long(ARG_BOOKMARK_REGEX_FORCE_GENERATE_LFS) + .takes_value(true) + .required(false) + .requires(GENERATE_BUNDLES) + .help("force generation of lfs bundles for bookmarks that match regex"), + ) + .arg( + Arg::with_name("verify-lfs-blob-presence") + .long("verify-lfs-blob-presence") + .takes_value(true) + .required(false) + .help("If generating bundles, verify lfs blob presence at this batch endpoint"), + ) + .about( + "Special job that takes bundles that were sent to Mononoke and \ + applies them to mercurial", + ); + + let sync_once = SubCommand::with_name(MODE_SYNC_ONCE) + .about("Syncs a single bundle") + .arg( + Arg::with_name("start-id") + .long("start-id") + .takes_value(true) + .required(true) + .help("id in the database table to start sync with"), + ); + let sync_loop = SubCommand::with_name(MODE_SYNC_LOOP) + .about("Syncs bundles one by one") + .arg( + Arg::with_name("start-id") + .long("start-id") + .takes_value(true) + .required(true) + .help("if current counter is not set then `start-id` will be used"), + ) + .arg( + Arg::with_name("loop-forever") + .long("loop-forever") + .takes_value(false) + .required(false) + .help( + "If set job will loop forever even if there are no new entries in db or \ + if there was an error", + ), + ) + .arg( + Arg::with_name("bundle-prefetch") + .long("bundle-prefetch") + .takes_value(true) + .required(false) + .help("How many bundles to prefetch"), + ) + .arg( + Arg::with_name("exit-file") + .long("exit-file") + .takes_value(true) + .required(false) + .help( + "If you provide this argument, the sync loop will gracefully exit \ + once this file exists", + ), + ) + .arg( + Arg::with_name("combine-bundles") + .long("combine-bundles") + .takes_value(true) + .required(false) + .help("How many bundles to combine into a single bundle before sending to hg"), + ); + let app = app.subcommand(sync_once).subcommand(sync_loop); + + let matches = app.get_matches(); + let logger = args::init_logging(fb, &matches); + + args::init_cachelib(fb, &matches, None); + + let ctx = CoreContext::new_with_logger(fb, logger.clone()); + + // TODO: Don't take ownership of matches here + let fut = run(ctx.clone(), matches.clone()).compat(); + + block_execute( + fut, + fb, + "hg_sync_job", + &logger, + &matches, + cmdlib::monitoring::AliveService, + ) +} diff --git a/eden/mononoke/mononoke_hg_sync_job/src/sendunbundlereplay.py b/eden/mononoke/mononoke_hg_sync_job/src/sendunbundlereplay.py new file mode 100644 index 0000000000000..3892de69b92c0 --- /dev/null +++ b/eden/mononoke/mononoke_hg_sync_job/src/sendunbundlereplay.py @@ -0,0 +1,199 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2. + +# sendunbundlereplay.py - send unbundlereplay wireproto command +from __future__ import absolute_import + +import base64 +import contextlib +import datetime +import os +import sys + +from edenscm.mercurial import error, hg, replay, util +from edenscm.mercurial.commands import command +from edenscm.mercurial.i18n import _ + + +def getcommitdates(ui, fname=None): + if fname: + with open(fname, "r") as tf: + timestamps = tf.readlines() + else: + timestamps = ui.fin + return dict([s.split("=") for s in timestamps]) + + +def getstream(fname): + with open(fname, "rb") as f: + return util.chunkbuffer([f.read()]) + + +def getremote(ui, path): + return hg.peer(ui, {}, path) + + +def runreplay(ui, remote, stream, commitdates, rebasedhead, ontobook): + returncode = 0 + try: + reply = remote.unbundlereplay( + stream, + ["force"], + remote.url(), + replay.ReplayData(commitdates, rebasedhead, ontobook), + ui.configbool("sendunbundlereplay", "respondlightly", True), + ) + except Exception: + returncode = 255 + finally: + if returncode != 0: + return returncode + + for part in reply.iterparts(): + part.read() + if part.type.startswith("error:"): + returncode = 1 + ui.warn(_("replay failed: %s\n") % part.type) + if "message" in part.params: + ui.warn(_("part message: %s\n") % (part.params["message"])) + return returncode + + +def writereport(reportsfile, msg): + reportsfile.write(msg) + reportsfile.flush() + os.fsync(reportsfile.fileno()) + + +@contextlib.contextmanager +def capturelogs(ui, remote, logfile): + if logfile is None: + yield + else: + uis = [remote.ui, ui] + for u in uis: + u.pushbuffer(error=True, subproc=True) + + try: + yield + finally: + output = "".join([u.popbuffer() for u in uis]) + ui.write_err(output) + with open(logfile, "w") as f: + f.write(output) + + +@command( + "sendunbundlereplaybatch", + [ + ("", "path", "", _("hg server remotepath (ssh)"), ""), + ("", "reports", "", _("a file for unbundereplay progress reports"), ""), + ], + _("[OPTION]..."), + norepo=True, +) +def sendunbundlereplaybatch(ui, **opts): + """Send a batch of unbundlereplay wireproto commands to a given server + + This exists to amortize the costs of `hg.peer` creation over multiple + `unbundlereplay` calls. + + Reads `(bundlefile, timestampsfile, ontobook, rebasedhead)` from + stdin. See docs of `sendunbundlereplay` for more details. + + Takes the `reports` argument on the command line. After each unbundlereplay + command is successfully executed, will write and flush a single line + into this file, thus reporting progress. File is truncated at the beginning + of this function. + + ``sendunbundlereplay.respondlightly`` config option instructs the server + to avoid sending large bundle2 parts back. + """ + if not opts.get("reports"): + raise error.Abort("--reports argument is required") + path = opts["path"] + returncode = 0 + remote = getremote(ui, path) + ui.debug("using %s as a reports file\n" % opts["reports"]) + with open(opts["reports"], "wb", 0) as reportsfile: + counter = 0 + while True: + line = sys.stdin.readline() + if line == "": + break + + # The newest sync job sends 5 parameters, but older versions send 4. + # We default the last parameter to None for compatibility. + parts = line.split() + if len(parts) == 4: + parts.append(None) + (bfname, tsfname, ontobook, rebasedhead, logfile) = parts + ontobook = base64.b64decode(ontobook) + + rebasedhead = None if rebasedhead == "DELETED" else rebasedhead + commitdates = getcommitdates(ui, tsfname) + stream = getstream(bfname) + + with capturelogs(ui, remote, logfile): + returncode = runreplay( + ui, remote, stream, commitdates, rebasedhead, ontobook + ) + + if returncode != 0: + # the word "failed" is an identifier of failure, do not change + failure = "unbundle replay batch item #%i failed\n" % counter + ui.warn(failure) + writereport(reportsfile, failure) + break + success = "unbundle replay batch item #%i successfully sent\n" % counter + ui.warn(success) + writereport(reportsfile, success) + counter += 1 + + return returncode + + +@command( + "sendunbundlereplay", + [ + ("", "file", "", _("file to read bundle from"), ""), + ("", "path", "", _("hg server remotepath (ssh)"), ""), + ("r", "rebasedhead", "", _("expected rebased head hash"), ""), + ( + "", + "deleted", + False, + _("bookmark was deleted, can't be used with `--rebasedhead`"), + ), + ("b", "ontobook", "", _("expected onto bookmark for pushrebase"), ""), + ], + _("[OPTION]..."), + norepo=True, +) +def sendunbundlereplay(ui, **opts): + """Send unbundlereplay wireproto command to a given server + + Takes `rebasedhook` and `ontobook` arguments on the commmand + line, and commit dates in stdin. The commit date format is: + = + + ``sendunbundlereplay.respondlightly`` config option instructs the server + to avoid sending large bundle2 parts back. + """ + fname = opts["file"] + path = opts["path"] + rebasedhead = opts["rebasedhead"] + deleted = opts["deleted"] + ontobook = opts["ontobook"] + if rebasedhead and deleted: + raise error.Abort("can't use `--rebasedhead` and `--deleted`") + + if not (rebasedhead or deleted): + raise error.Abort("either `--rebasedhead` or `--deleted` should be used") + + commitdates = getcommitdates(ui) + stream = getstream(fname) + remote = getremote(ui, path) + return runreplay(ui, remote, stream, commitdates, rebasedhead, ontobook) diff --git a/eden/mononoke/mononoke_hg_sync_job_helper_lib/Cargo.toml b/eden/mononoke/mononoke_hg_sync_job_helper_lib/Cargo.toml deleted file mode 100644 index 5c8feebe1a617..0000000000000 --- a/eden/mononoke/mononoke_hg_sync_job_helper_lib/Cargo.toml +++ /dev/null @@ -1,24 +0,0 @@ -[package] -name = "mononoke_hg_sync_job_helper_lib" -edition = "2018" -version = "0.1.0" -authors = ['Facebook'] -license = "GPLv2+" -include = ["src/**/*.rs"] - -[dependencies] -blobstore = { path = "../blobstore" } -context = { path = "../server/context" } -mercurial_bundles = { path = "../mercurial/bundles" } -mononoke_types = { path = "../mononoke_types" } -cloned = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } -futures_ext = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } -anyhow = "1.0" -bytes-old = { package = "bytes", version = "0.4", features = ["serde"] } -futures = { version = "0.3.5", features = ["async-await", "compat"] } -futures-old = { package = "futures", version = "0.1" } -slog = { version = "2.5", features = ["max_level_debug"] } -tempfile = "3.1" -tokio = { version = "=0.2.13", features = ["full"] } -tokio-io = "0.1" -tokio-timer = "0.2" diff --git a/eden/mononoke/tests/integration/manifest_deps b/eden/mononoke/tests/integration/manifest_deps index bd0b726b7d944..618ef354420fe 100644 --- a/eden/mononoke/tests/integration/manifest_deps +++ b/eden/mononoke/tests/integration/manifest_deps @@ -18,6 +18,7 @@ MONONOKE_BINS = { "MONONOKE_FASTREPLAY": "fastreplay", "MONONOKE_GITIMPORT": "gitimport", "MONONOKE_HGCLI": "hgcli", + "MONONOKE_HG_SYNC": "mononoke_hg_sync_job", "MONONOKE_HOOK_TAILER": "hook_tailer", "MONONOKE_LFS_IMPORT": "lfs_import", "MONONOKE_MANUAL_SCRUB": "manual_scrub", diff --git a/eden/mononoke/tests/integration/run_tests_getdeps.py b/eden/mononoke/tests/integration/run_tests_getdeps.py index 3a2d7f169d603..ce6ce1442e281 100755 --- a/eden/mononoke/tests/integration/run_tests_getdeps.py +++ b/eden/mononoke/tests/integration/run_tests_getdeps.py @@ -74,7 +74,6 @@ "test-backsync-forever.t", # Unknown issue "test-blobimport-lfs.t", # Timed out "test-blobimport.t", # Case insensitivity of paths in MacOS - "test-blobstore_healer.t", # PANIC not implemented in sql_ext "test-bookmarks-filler.t", # Probably missing binary "test-cmd-manual-scrub.t", # Just wrong outout "test-cross-repo-commit-sync-live.t", # Unknown issue @@ -109,21 +108,13 @@ "test-lfs-to-mononoke.t", # Timed out "test-lfs-wantslfspointers.t", # Timed out "test-lfs.t", # Timed out - "test-megarepo-tool.t", # Missing MONONOKE_HG_SYNC + "test-megarepo-tool.t", # Missing MEGAREPO_TOOL "test-mononoke-admin.t", # Missing MEGAREPO_TOOL - "test-mononoke-hg-sync-job-generate-bundles-force.t", # Missing MONONOKE_HG_SYNC "test-mononoke-hg-sync-job-generate-bundles-lfs-verification.t", # Timed out "test-mononoke-hg-sync-job-generate-bundles-lfs.t", # Timed out - "test-mononoke-hg-sync-job-generate-bundles-loop.t", # Missing MONONOKE_HG_SYNC - "test-mononoke-hg-sync-job-generate-bundles-other-books.t", # Missing MONONOKE_HG_SYNC - "test-mononoke-hg-sync-job-generate-bundles.t", # Missing MONONOKE_HG_SYNC - "test-mononoke-hg-sync-job-sync-globalrevs.t", # Missing MONONOKE_HG_SYNC - "test-mononoke-hg-sync-job-with-copies.t", # Missing MONONOKE_HG_SYNC - "test-mononoke-hg-sync-job.t", # Missing MONONOKE_HG_SYNC "test-push-protocol-lfs.t", # Timed out "test-push-redirector-pushrebase-hooks.t", # Hooks are not in OSS yet "test-push-redirector-pushrebase-onesided.t", # Missing MONONOKE_X_REPO_SYNC - "test-push-redirector-sync-job.t", # Missing MONONOKE_HG_SYNC "test-pushrebase-block-casefolding.t", # Most likely MacOS path case insensitivity "test-pushrebase-discovery.t", # Hooks are not in OSS yet "test-remotefilelog-lfs.t", # Timed out From ecebff94ee40ea2ede1014124bb9fcef3b3127df Mon Sep 17 00:00:00 2001 From: Lukas Piatkowski Date: Wed, 29 Jul 2020 05:56:33 -0700 Subject: [PATCH 3/3] mononoke/megarepotool: make megarepotool public (#38) Summary: Pull Request resolved: https://github.com/facebookexperimental/eden/pull/38 The tool is used in some integration tests, make it public so that the tests might pass Differential Revision: D22815283 fbshipit-source-id: 627834ef1491b7d885e761cbba2624f08f56e76b --- eden/mononoke/Cargo.toml | 2 +- .../cross_repo_sync/test_utils/Cargo.toml | 2 +- .../{megarepolib => megarepo}/Cargo.toml | 26 +- .../{megarepolib => megarepo}/src/chunking.rs | 0 .../{megarepolib => megarepo}/src/common.rs | 0 .../{megarepolib => megarepo}/src/lib.rs | 0 .../src/pre_merge_deletes.rs | 0 .../commit_rewriting/megarepo/tool/cli.rs | 160 +++++++ .../commit_rewriting/megarepo/tool/main.rs | 221 +++++++++ .../commit_rewriting/megarepo/tool/merging.rs | 144 ++++++ .../megarepo/tool/sync_diamond_merge.rs | 429 ++++++++++++++++++ eden/mononoke/tests/integration/manifest_deps | 1 + .../tests/integration/run_tests_getdeps.py | 2 - 13 files changed, 978 insertions(+), 9 deletions(-) rename eden/mononoke/commit_rewriting/{megarepolib => megarepo}/Cargo.toml (66%) rename eden/mononoke/commit_rewriting/{megarepolib => megarepo}/src/chunking.rs (100%) rename eden/mononoke/commit_rewriting/{megarepolib => megarepo}/src/common.rs (100%) rename eden/mononoke/commit_rewriting/{megarepolib => megarepo}/src/lib.rs (100%) rename eden/mononoke/commit_rewriting/{megarepolib => megarepo}/src/pre_merge_deletes.rs (100%) create mode 100644 eden/mononoke/commit_rewriting/megarepo/tool/cli.rs create mode 100644 eden/mononoke/commit_rewriting/megarepo/tool/main.rs create mode 100644 eden/mononoke/commit_rewriting/megarepo/tool/merging.rs create mode 100644 eden/mononoke/commit_rewriting/megarepo/tool/sync_diamond_merge.rs diff --git a/eden/mononoke/Cargo.toml b/eden/mononoke/Cargo.toml index 1e0bbcffc5d9c..8a30368723917 100644 --- a/eden/mononoke/Cargo.toml +++ b/eden/mononoke/Cargo.toml @@ -236,7 +236,7 @@ members = [ "commit_rewriting/cross_repo_sync", "commit_rewriting/cross_repo_sync/test_utils", "commit_rewriting/live_commit_sync_config", - "commit_rewriting/megarepolib", + "commit_rewriting/megarepo", "commit_rewriting/movers", "commit_rewriting/synced_commit_mapping", "common/allocation_tracing", diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/test_utils/Cargo.toml b/eden/mononoke/commit_rewriting/cross_repo_sync/test_utils/Cargo.toml index 6631b48457e24..17433d0b83c1d 100644 --- a/eden/mononoke/commit_rewriting/cross_repo_sync/test_utils/Cargo.toml +++ b/eden/mononoke/commit_rewriting/cross_repo_sync/test_utils/Cargo.toml @@ -17,7 +17,7 @@ blobstore = { path = "../../../blobstore" } bookmarks = { path = "../../../bookmarks" } context = { path = "../../../server/context" } cross_repo_sync = { path = ".." } -megarepolib = { path = "../../megarepolib" } +megarepolib = { path = "../../megarepo" } metaconfig_types = { path = "../../../metaconfig/types" } mononoke_types = { path = "../../../mononoke_types" } sql_construct = { path = "../../../common/sql_construct" } diff --git a/eden/mononoke/commit_rewriting/megarepolib/Cargo.toml b/eden/mononoke/commit_rewriting/megarepo/Cargo.toml similarity index 66% rename from eden/mononoke/commit_rewriting/megarepolib/Cargo.toml rename to eden/mononoke/commit_rewriting/megarepo/Cargo.toml index 3d130a118de3d..1e65f1921971b 100644 --- a/eden/mononoke/commit_rewriting/megarepolib/Cargo.toml +++ b/eden/mononoke/commit_rewriting/megarepo/Cargo.toml @@ -4,29 +4,45 @@ edition = "2018" version = "0.1.0" authors = ['Facebook'] license = "GPLv2+" -include = ["src/**/*.rs"] +include = ["src/**/*.rs", "tool/**/*.rs"] + +[lib] +path = "src/lib.rs" + +[[bin]] +name = "megarepotool" +path = "tool/main.rs" [dependencies] blobrepo = { path = "../../blobrepo" } blobrepo_hg = { path = "../../blobrepo/blobrepo_hg" } +blobrepo_utils = { path = "../../blobrepo_utils" } blobstore = { path = "../../blobstore" } bookmarks = { path = "../../bookmarks" } +cmdlib = { path = "../../cmdlib" } context = { path = "../../server/context" } +cross_repo_sync = { path = "../cross_repo_sync" } manifest = { path = "../../manifest" } mercurial_types = { path = "../../mercurial/types" } +metaconfig_types = { path = "../../metaconfig/types" } mononoke_types = { path = "../../mononoke_types" } movers = { path = "../movers" } +revset = { path = "../../revset" } +skiplist = { path = "../../reachabilityindex/skiplist" } +synced_commit_mapping = { path = "../synced_commit_mapping" } +cloned = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +fbinit = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } +futures_ext = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } anyhow = "1.0" +clap = "2.33" futures = { version = "0.3.5", features = ["async-await", "compat"] } +futures-old = { package = "futures", version = "0.1" } itertools = "0.8" +maplit = "1.0" slog = { version = "2.5", features = ["max_level_debug"] } [dev-dependencies] fixtures = { path = "../../tests/fixtures" } tests_utils = { path = "../../tests/utils" } async_unit = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } -cloned = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } -fbinit = { git = "https://github.com/facebookexperimental/rust-shed.git", branch = "master" } -futures-old = { package = "futures", version = "0.1" } -maplit = "1.0" tokio-compat = "0.1" diff --git a/eden/mononoke/commit_rewriting/megarepolib/src/chunking.rs b/eden/mononoke/commit_rewriting/megarepo/src/chunking.rs similarity index 100% rename from eden/mononoke/commit_rewriting/megarepolib/src/chunking.rs rename to eden/mononoke/commit_rewriting/megarepo/src/chunking.rs diff --git a/eden/mononoke/commit_rewriting/megarepolib/src/common.rs b/eden/mononoke/commit_rewriting/megarepo/src/common.rs similarity index 100% rename from eden/mononoke/commit_rewriting/megarepolib/src/common.rs rename to eden/mononoke/commit_rewriting/megarepo/src/common.rs diff --git a/eden/mononoke/commit_rewriting/megarepolib/src/lib.rs b/eden/mononoke/commit_rewriting/megarepo/src/lib.rs similarity index 100% rename from eden/mononoke/commit_rewriting/megarepolib/src/lib.rs rename to eden/mononoke/commit_rewriting/megarepo/src/lib.rs diff --git a/eden/mononoke/commit_rewriting/megarepolib/src/pre_merge_deletes.rs b/eden/mononoke/commit_rewriting/megarepo/src/pre_merge_deletes.rs similarity index 100% rename from eden/mononoke/commit_rewriting/megarepolib/src/pre_merge_deletes.rs rename to eden/mononoke/commit_rewriting/megarepo/src/pre_merge_deletes.rs diff --git a/eden/mononoke/commit_rewriting/megarepo/tool/cli.rs b/eden/mononoke/commit_rewriting/megarepo/tool/cli.rs new file mode 100644 index 0000000000000..33f4adee7ec66 --- /dev/null +++ b/eden/mononoke/commit_rewriting/megarepo/tool/cli.rs @@ -0,0 +1,160 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use anyhow::{format_err, Error}; +use bookmarks::BookmarkName; +use clap::{App, Arg, ArgMatches, SubCommand}; +use cmdlib::args; +use futures_ext::{try_boxfuture, BoxFuture, FutureExt}; +use futures_old::future::{err, ok}; +use megarepolib::common::ChangesetArgs; +use mononoke_types::DateTime; + +pub const COMMIT_HASH: &'static str = "commit-hash"; +pub const MOVE: &'static str = "move"; +pub const MERGE: &'static str = "merge"; +pub const MARK_PUBLIC: &'static str = "mark-public"; +pub const ORIGIN_REPO: &'static str = "origin-repo"; +pub const CHANGESET: &'static str = "commit"; +pub const FIRST_PARENT: &'static str = "first-parent"; +pub const SECOND_PARENT: &'static str = "second-parent"; +pub const COMMIT_MESSAGE: &'static str = "commit-message"; +pub const COMMIT_AUTHOR: &'static str = "commit-author"; +pub const COMMIT_DATE_RFC3339: &'static str = "commit-date-rfc3339"; +pub const COMMIT_BOOKMARK: &'static str = "bookmark"; +pub const SYNC_DIAMOND_MERGE: &'static str = "sync-diamond-merge"; +pub const MAX_NUM_OF_MOVES_IN_COMMIT: &'static str = "max-num-of-moves-in-commit"; + +pub fn cs_args_from_matches<'a>(sub_m: &ArgMatches<'a>) -> BoxFuture { + let message = try_boxfuture!(sub_m + .value_of(COMMIT_MESSAGE) + .ok_or_else(|| format_err!("missing argument {}", COMMIT_MESSAGE))) + .to_string(); + let author = try_boxfuture!(sub_m + .value_of(COMMIT_AUTHOR) + .ok_or_else(|| format_err!("missing argument {}", COMMIT_AUTHOR))) + .to_string(); + let datetime = try_boxfuture!(sub_m + .value_of(COMMIT_DATE_RFC3339) + .map(|datetime_str| DateTime::from_rfc3339(datetime_str)) + .unwrap_or_else(|| Ok(DateTime::now()))); + let bookmark = try_boxfuture!(sub_m + .value_of(COMMIT_BOOKMARK) + .map(|bookmark_str| BookmarkName::new(bookmark_str)) + .transpose()); + let mark_public = sub_m.is_present(MARK_PUBLIC); + if !mark_public && bookmark.is_some() { + return err(format_err!( + "--mark-public is required if --bookmark is provided" + )) + .boxify(); + } + + ok(ChangesetArgs { + author, + message, + datetime, + bookmark, + mark_public, + }) + .boxify() +} + +fn add_resulting_commit_args<'a, 'b>(subcommand: App<'a, 'b>) -> App<'a, 'b> { + subcommand + .arg( + Arg::with_name(COMMIT_AUTHOR) + .help("commit author to use") + .takes_value(true) + .required(true), + ) + .arg( + Arg::with_name(COMMIT_MESSAGE) + .help("commit message to use") + .takes_value(true) + .required(true), + ) + .arg( + Arg::with_name(MARK_PUBLIC) + .help("add the resulting commit to the public phase") + .long(MARK_PUBLIC), + ) + .arg( + Arg::with_name(COMMIT_DATE_RFC3339) + .help("commit date to use (default is now)") + .long(COMMIT_DATE_RFC3339) + .takes_value(true), + ) + .arg( + Arg::with_name(COMMIT_BOOKMARK) + .help("bookmark to point to resulting commits (no sanity checks, will move existing bookmark, be careful)") + .long(COMMIT_BOOKMARK) + .takes_value(true) + ) +} + +pub fn setup_app<'a, 'b>() -> App<'a, 'b> { + let move_subcommand = SubCommand::with_name(MOVE) + .about("create a move commit, using a provided spec") + .arg( + Arg::with_name(MAX_NUM_OF_MOVES_IN_COMMIT) + .long(MAX_NUM_OF_MOVES_IN_COMMIT) + .help("how many files a single commit moves (note - that might create a stack of move commits instead of just one)") + .takes_value(true) + .required(false), + ) + .arg( + Arg::with_name(ORIGIN_REPO) + .help("use predefined mover for part of megarepo, coming from this repo") + .takes_value(true) + .required(true), + ) + .arg( + Arg::with_name(CHANGESET) + .help("a changeset hash or bookmark of move commit's parent") + .takes_value(true) + .required(true), + ); + + let merge_subcommand = SubCommand::with_name(MERGE) + .about("create a merge commit with given parents") + .arg( + Arg::with_name(FIRST_PARENT) + .help("first parent of a produced merge commit") + .takes_value(true) + .required(true), + ) + .arg( + Arg::with_name(SECOND_PARENT) + .help("second parent of a produced merge commit") + .takes_value(true) + .required(true), + ); + + let sync_diamond_subcommand = SubCommand::with_name(SYNC_DIAMOND_MERGE) + .about("sync a diamond merge commit from a small repo into large repo") + .arg( + Arg::with_name(COMMIT_HASH) + .help("diamond merge commit from small repo to sync") + .takes_value(true) + .required(true), + ) + .arg( + Arg::with_name(COMMIT_BOOKMARK) + .help("bookmark to point to resulting commits (no sanity checks, will move existing bookmark, be careful)") + .long(COMMIT_BOOKMARK) + .takes_value(true) + ); + + args::MononokeApp::new("megarepo preparation tool") + .with_advanced_args_hidden() + .with_source_and_target_repos() + .build() + .subcommand(add_resulting_commit_args(move_subcommand)) + .subcommand(add_resulting_commit_args(merge_subcommand)) + .subcommand(sync_diamond_subcommand) +} diff --git a/eden/mononoke/commit_rewriting/megarepo/tool/main.rs b/eden/mononoke/commit_rewriting/megarepo/tool/main.rs new file mode 100644 index 0000000000000..75870a16cab54 --- /dev/null +++ b/eden/mononoke/commit_rewriting/megarepo/tool/main.rs @@ -0,0 +1,221 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +#![deny(warnings)] +#![feature(process_exitcode_placeholder)] + +use anyhow::{bail, format_err, Error, Result}; +use bookmarks::BookmarkName; +use clap::ArgMatches; +use cmdlib::{args, helpers}; +use context::CoreContext; +use fbinit::FacebookInit; +use futures::{ + compat::Future01CompatExt, + future::{try_join, try_join3}, +}; +use metaconfig_types::RepoConfig; +use mononoke_types::RepositoryId; +use movers::get_small_to_large_mover; +use slog::info; +use std::num::NonZeroU64; +use synced_commit_mapping::SqlSyncedCommitMapping; + +mod cli; +mod merging; +mod sync_diamond_merge; + +use crate::cli::{ + cs_args_from_matches, setup_app, CHANGESET, COMMIT_HASH, FIRST_PARENT, + MAX_NUM_OF_MOVES_IN_COMMIT, MERGE, MOVE, ORIGIN_REPO, SECOND_PARENT, SYNC_DIAMOND_MERGE, +}; +use crate::merging::perform_merge; +use megarepolib::{common::StackPosition, perform_move, perform_stack_move}; + +async fn run_move<'a>( + ctx: CoreContext, + matches: &ArgMatches<'a>, + sub_m: &ArgMatches<'a>, + repo_config: RepoConfig, +) -> Result<(), Error> { + let origin_repo = + RepositoryId::new(args::get_i32_opt(&sub_m, ORIGIN_REPO).expect("Origin repo is missing")); + let resulting_changeset_args = cs_args_from_matches(&sub_m); + let commit_sync_config = repo_config.commit_sync_config.as_ref().unwrap(); + let mover = get_small_to_large_mover(commit_sync_config, origin_repo).unwrap(); + let move_parent = sub_m.value_of(CHANGESET).unwrap().to_owned(); + + let max_num_of_moves_in_commit = + args::get_and_parse_opt::(sub_m, MAX_NUM_OF_MOVES_IN_COMMIT); + + let (repo, resulting_changeset_args) = try_join( + args::open_repo(ctx.fb, &ctx.logger().clone(), &matches).compat(), + resulting_changeset_args.compat(), + ) + .await?; + + let parent_bcs_id = helpers::csid_resolve(ctx.clone(), repo.clone(), move_parent) + .compat() + .await?; + + if let Some(max_num_of_moves_in_commit) = max_num_of_moves_in_commit { + perform_stack_move( + &ctx, + &repo, + parent_bcs_id, + mover, + max_num_of_moves_in_commit, + |num: StackPosition| { + let mut args = resulting_changeset_args.clone(); + let message = args.message + &format!(" #{}", num.0); + args.message = message; + args + }, + ) + .await + .map(|changesets| { + info!( + ctx.logger(), + "created {} commits, with the last commit {:?}", + changesets.len(), + changesets.last() + ); + () + }) + } else { + perform_move(&ctx, &repo, parent_bcs_id, mover, resulting_changeset_args) + .await + .map(|_| ()) + } +} + +async fn run_merge<'a>( + ctx: CoreContext, + matches: &ArgMatches<'a>, + sub_m: &ArgMatches<'a>, +) -> Result<(), Error> { + let first_parent = sub_m.value_of(FIRST_PARENT).unwrap().to_owned(); + let second_parent = sub_m.value_of(SECOND_PARENT).unwrap().to_owned(); + let resulting_changeset_args = cs_args_from_matches(&sub_m); + let (repo, resulting_changeset_args) = try_join( + args::open_repo(ctx.fb, &ctx.logger().clone(), &matches).compat(), + resulting_changeset_args.compat(), + ) + .await?; + + let first_parent_fut = helpers::csid_resolve(ctx.clone(), repo.clone(), first_parent); + let second_parent_fut = helpers::csid_resolve(ctx.clone(), repo.clone(), second_parent); + let (first_parent, second_parent) = + try_join(first_parent_fut.compat(), second_parent_fut.compat()).await?; + + info!(ctx.logger(), "Creating a merge commit"); + perform_merge( + ctx.clone(), + repo.clone(), + first_parent, + second_parent, + resulting_changeset_args, + ) + .compat() + .await + .map(|_| ()) +} + +async fn run_sync_diamond_merge<'a>( + ctx: CoreContext, + matches: &ArgMatches<'a>, + sub_m: &ArgMatches<'a>, +) -> Result<(), Error> { + let source_repo_id = args::get_source_repo_id(ctx.fb, matches)?; + let target_repo_id = args::get_target_repo_id(ctx.fb, matches)?; + let maybe_bookmark = sub_m + .value_of(cli::COMMIT_BOOKMARK) + .map(|bookmark_str| BookmarkName::new(bookmark_str)) + .transpose()?; + + let bookmark = maybe_bookmark.ok_or(Error::msg("bookmark must be specified"))?; + + let source_repo = args::open_repo_with_repo_id(ctx.fb, ctx.logger(), source_repo_id, matches); + let target_repo = args::open_repo_with_repo_id(ctx.fb, ctx.logger(), target_repo_id, matches); + let mapping = args::open_source_sql::(ctx.fb, &matches); + + let (_, source_repo_config) = args::get_config_by_repoid(ctx.fb, matches, source_repo_id)?; + + let merge_commit_hash = sub_m.value_of(COMMIT_HASH).unwrap().to_owned(); + let (source_repo, target_repo, mapping) = + try_join3(source_repo.compat(), target_repo.compat(), mapping.compat()).await?; + + let source_merge_cs_id = + helpers::csid_resolve(ctx.clone(), source_repo.clone(), merge_commit_hash) + .compat() + .await?; + + sync_diamond_merge::do_sync_diamond_merge( + ctx, + source_repo, + target_repo, + source_merge_cs_id, + mapping, + source_repo_config, + bookmark, + ) + .await + .map(|_| ()) +} + +fn get_and_verify_repo_config<'a>( + fb: FacebookInit, + matches: &ArgMatches<'a>, +) -> Result { + args::get_config(fb, &matches).and_then(|(repo_name, repo_config)| { + let repo_id = repo_config.repoid; + repo_config + .commit_sync_config + .as_ref() + .ok_or_else(|| format_err!("no sync config provided for {}", repo_name)) + .map(|commit_sync_config| commit_sync_config.large_repo_id) + .and_then(move |large_repo_id| { + if repo_id != large_repo_id { + Err(format_err!( + "repo must be a large repo in commit sync config" + )) + } else { + Ok(repo_config) + } + }) + }) +} + +#[fbinit::main] +fn main(fb: FacebookInit) -> Result<()> { + let app = setup_app(); + let matches = app.get_matches(); + args::init_cachelib(fb, &matches, None); + let logger = args::init_logging(fb, &matches); + let ctx = CoreContext::new_with_logger(fb, logger.clone()); + + let subcommand_future = async { + match matches.subcommand() { + (MOVE, Some(sub_m)) => { + let repo_config = get_and_verify_repo_config(fb, &matches)?; + run_move(ctx, &matches, sub_m, repo_config).await + } + (MERGE, Some(sub_m)) => run_merge(ctx, &matches, sub_m).await, + (SYNC_DIAMOND_MERGE, Some(sub_m)) => run_sync_diamond_merge(ctx, &matches, sub_m).await, + _ => bail!("oh no, wrong arguments provided!"), + } + }; + + helpers::block_execute( + subcommand_future, + fb, + "megarepotool", + &logger, + &matches, + cmdlib::monitoring::AliveService, + ) +} diff --git a/eden/mononoke/commit_rewriting/megarepo/tool/merging.rs b/eden/mononoke/commit_rewriting/megarepo/tool/merging.rs new file mode 100644 index 0000000000000..318ead3e276d0 --- /dev/null +++ b/eden/mononoke/commit_rewriting/megarepo/tool/merging.rs @@ -0,0 +1,144 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +use anyhow::{format_err, Error}; +use blobrepo::BlobRepo; +use blobrepo_hg::BlobRepoHg; +use blobstore::Loadable; +use cloned::cloned; +use context::CoreContext; +use futures::{FutureExt, TryFutureExt}; +use futures_old::future::{err, ok, Future}; +use futures_old::stream::Stream; +use manifest::ManifestOps; +use mercurial_types::{HgChangesetId, MPath}; +use mononoke_types::ChangesetId; +use slog::info; +use std::collections::{BTreeMap, HashSet}; +use std::iter::FromIterator; + +use megarepolib::common::{create_save_and_generate_hg_changeset, ChangesetArgs}; + +fn get_all_files_in_working_copy( + ctx: CoreContext, + repo: BlobRepo, + hg_cs_id: HgChangesetId, +) -> impl Future, Error = Error> { + hg_cs_id + .load(ctx.clone(), repo.blobstore()) + .compat() + .from_err() + .and_then({ + cloned!(ctx, repo); + move |hg_cs| { + hg_cs + .manifestid() + .list_leaf_entries(ctx, repo.get_blobstore()) + .map(|(mpath, _)| mpath) + .collect() + } + }) +} + +fn fail_on_path_conflicts( + ctx: CoreContext, + repo: BlobRepo, + hg_cs_id_1: HgChangesetId, + hg_cs_id_2: HgChangesetId, +) -> impl Future { + info!(ctx.logger(), "Checking if there are any path conflicts"); + let all_files_1_fut = get_all_files_in_working_copy(ctx.clone(), repo.clone(), hg_cs_id_1); + let all_files_2_fut = get_all_files_in_working_copy(ctx.clone(), repo.clone(), hg_cs_id_2); + all_files_1_fut + .join(all_files_2_fut) + .and_then(move |(all_files_1, all_files_2)| { + let all_files_1 = HashSet::<_>::from_iter(all_files_1); + let all_files_2 = HashSet::from_iter(all_files_2); + let intersection: Vec = all_files_1 + .intersection(&all_files_2) + .take(10) + .cloned() + .collect(); + if intersection.len() > 0 { + err(format_err!( + "There are paths present in both parents: {:?} ...", + intersection + )) + } else { + info!(ctx.logger(), "Done checking path conflicts"); + ok(()) + } + }) +} + +pub fn perform_merge( + ctx: CoreContext, + repo: BlobRepo, + first_bcs_id: ChangesetId, + second_bcs_id: ChangesetId, + resulting_changeset_args: ChangesetArgs, +) -> impl Future { + let first_hg_cs_id_fut = repo.get_hg_from_bonsai_changeset(ctx.clone(), first_bcs_id.clone()); + let second_hg_cs_id_fut = repo.get_hg_from_bonsai_changeset(ctx.clone(), second_bcs_id.clone()); + first_hg_cs_id_fut + .join(second_hg_cs_id_fut) + .and_then({ + cloned!(ctx, repo); + move |(first_hg_cs_id, second_hg_cs_id)| { + fail_on_path_conflicts(ctx, repo, first_hg_cs_id, second_hg_cs_id) + } + }) + .and_then({ + cloned!(ctx, repo, first_bcs_id, second_bcs_id); + move |_| { + info!( + ctx.logger(), + "Creating a merge bonsai changeset with parents: {:?}, {:?}", + first_bcs_id, + second_bcs_id + ); + async move { + create_save_and_generate_hg_changeset( + &ctx, + &repo, + vec![first_bcs_id, second_bcs_id], + BTreeMap::new(), + resulting_changeset_args, + ) + .await + } + .boxed() + .compat() + } + }) +} + +#[cfg(test)] +mod test { + use super::*; + use fbinit::FacebookInit; + use fixtures::merge_even; + use futures::compat::Future01CompatExt; + use std::str::FromStr; + + #[fbinit::test] + fn test_path_conflict_detection(fb: FacebookInit) { + async_unit::tokio_unit_test(async move { + let repo = merge_even::getrepo(fb).await; + let ctx = CoreContext::test_mock(fb); + let p1 = HgChangesetId::from_str("4f7f3fd428bec1a48f9314414b063c706d9c1aed").unwrap(); + let p2 = HgChangesetId::from_str("16839021e338500b3cf7c9b871c8a07351697d68").unwrap(); + assert!( + fail_on_path_conflicts(ctx, repo, p1, p2) + .compat() + .await + .is_err(), + "path conflicts should've been detected" + ); + }); + } +} diff --git a/eden/mononoke/commit_rewriting/megarepo/tool/sync_diamond_merge.rs b/eden/mononoke/commit_rewriting/megarepo/tool/sync_diamond_merge.rs new file mode 100644 index 0000000000000..7914b824b6edb --- /dev/null +++ b/eden/mononoke/commit_rewriting/megarepo/tool/sync_diamond_merge.rs @@ -0,0 +1,429 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This software may be used and distributed according to the terms of the + * GNU General Public License version 2. + */ + +/// This is a very hacky temporary tool that's used with only one purpose - +/// to half-manually sync a diamond merge commit from a small repo into a large repo. +/// NOTE - this is not a production quality tool, but rather a best effort attempt to +/// half-automate a rare case that might occur. Tool most likely doesn't cover all the cases. +/// USE WITH CARE! +use anyhow::{format_err, Error}; +use blobrepo::BlobRepo; +use blobrepo_hg::BlobRepoHg; +use blobrepo_utils::convert_diff_result_into_file_change_for_diamond_merge; +use blobstore::Loadable; +use bookmarks::{BookmarkName, BookmarkUpdateReason}; +use cloned::cloned; +use context::CoreContext; +use cross_repo_sync::{ + create_commit_syncers, rewrite_commit, update_mapping, upload_commits, CommitSyncOutcome, + CommitSyncer, Syncers, +}; +use futures::{ + compat::Future01CompatExt, + future::TryFutureExt, + stream::{futures_unordered::FuturesUnordered, TryStreamExt}, +}; +use futures_ext::{BoxStream, StreamExt}; +use futures_old::{Future, IntoFuture, Stream}; +use manifest::{bonsai_diff, BonsaiDiffFileChange}; +use maplit::hashmap; +use mercurial_types::{HgFileNodeId, HgManifestId}; +use metaconfig_types::RepoConfig; +use mononoke_types::{BonsaiChangeset, ChangesetId, FileChange, MPath}; +use revset::DifferenceOfUnionsOfAncestorsNodeStream; +use skiplist::fetch_skiplist_index; +use slog::{info, warn}; +use std::collections::{BTreeMap, HashMap}; +use synced_commit_mapping::SqlSyncedCommitMapping; + +/// The function syncs merge commit M from a small repo into a large repo. +/// It's designed to handle a case described below +/// +/// Small repo state +/// M +/// | \ +/// P1 | <- P1 must already be synced +/// | | +/// | P2 <- might not be synced yet +/// ... | +/// | / +/// | / +/// ROOT +/// +/// Large repo state +/// +/// O <- ONTO value (i.e. where onto_bookmark points to) +/// ... <- commits from another small repo +/// | +/// P1' <- synced P1 commit from small repo +/// | +/// OVR' <- Potentially there can be commits from another repo between root and P1! +/// | +/// ROOT' <- synced ROOT commit +/// +/// +/// Most of the complexity stems from two facts +/// 1) If parents have different file content, then merge commit must have a file change entry for them +/// 2) that large repo might have rewritten commits from another small repo between ROOT' and P1'. +/// +/// That means that rewritten M' bonsai object must contain file change entries that were changed +/// in OVR* commits. +/// +/// So the function works as follows: +/// 1) Syncs all ROOT::P2 commits - nothing difficult here, just rewrite and save to large repo. +/// Those commits are expected to be non-merges for simplicity +/// 2) Create new merge commit +/// a) First find all the changed files from another small repo - those need to be in the merge repo +/// NOTE - we expect that all changes from this small repo are already in the bonsai changeset +/// b) Add file changes from previous step in the merge commit +/// c) Change parents +/// 3) Save merge commit in large repo +/// 4) Update the bookmark +pub async fn do_sync_diamond_merge( + ctx: CoreContext, + small_repo: BlobRepo, + large_repo: BlobRepo, + small_merge_cs_id: ChangesetId, + mapping: SqlSyncedCommitMapping, + small_repo_config: RepoConfig, + onto_bookmark: BookmarkName, +) -> Result<(), Error> { + info!( + ctx.logger(), + "Preparing to sync a merge commit {}...", small_merge_cs_id + ); + + let parents = small_repo + .get_changeset_parents_by_bonsai(ctx.clone(), small_merge_cs_id) + .compat() + .await?; + + let (p1, p2) = validate_parents(parents)?; + + let new_branch = + find_new_branch_oldest_first(ctx.clone(), &small_repo, p1, p2, &small_repo_config).await?; + + let syncers = create_commit_syncers( + small_repo.clone(), + large_repo.clone(), + &small_repo_config + .commit_sync_config + .ok_or(Error::msg("Commit sync config is not specified"))?, + mapping, + )?; + + let small_root = find_root(&new_branch)?; + + info!( + ctx.logger(), + "{} new commits are going to be merged in", + new_branch.len() + ); + for bcs in new_branch { + let cs_id = bcs.get_changeset_id(); + let parents = bcs.parents().collect::>(); + if parents.len() > 1 { + return Err(format_err!( + "{} from branch contains more than one parent", + cs_id + )); + } + info!(ctx.logger(), "syncing commit from new branch {}", cs_id); + syncers + .small_to_large + .unsafe_sync_commit(ctx.clone(), cs_id) + .await?; + } + + let maybe_onto_value = large_repo + .get_bonsai_bookmark(ctx.clone(), &onto_bookmark) + .compat() + .await?; + + let onto_value = + maybe_onto_value.ok_or(format_err!("cannot find bookmark {}", onto_bookmark))?; + + let rewritten = create_rewritten_merge_commit( + ctx.clone(), + small_merge_cs_id, + &small_repo, + &large_repo, + &syncers, + small_root, + onto_value, + ) + .await?; + + let new_merge_cs_id = rewritten.get_changeset_id(); + info!(ctx.logger(), "uploading merge commit {}", new_merge_cs_id); + upload_commits(ctx.clone(), vec![rewritten], small_repo, large_repo.clone()).await?; + + update_mapping( + ctx.clone(), + hashmap! {small_merge_cs_id => new_merge_cs_id}, + &syncers.small_to_large, + ) + .await?; + + let mut book_txn = large_repo.update_bookmark_transaction(ctx.clone()); + book_txn.force_set( + &onto_bookmark, + new_merge_cs_id, + BookmarkUpdateReason::ManualMove, + None, + )?; + book_txn.commit().await?; + + warn!( + ctx.logger(), + "It is recommended to run 'mononoke_admin crossrepo verify-wc' for {}!", new_merge_cs_id + ); + Ok(()) +} + +async fn create_rewritten_merge_commit( + ctx: CoreContext, + small_merge_cs_id: ChangesetId, + small_repo: &BlobRepo, + large_repo: &BlobRepo, + syncers: &Syncers, + small_root: ChangesetId, + onto_value: ChangesetId, +) -> Result { + let merge_bcs = small_merge_cs_id + .load(ctx.clone(), small_repo.blobstore()) + .await?; + + let parents = merge_bcs.parents().collect(); + let (p1, p2) = validate_parents(parents)?; + + let merge_bcs = merge_bcs.into_mut(); + + let large_root = remap_commit(ctx.clone(), &syncers.small_to_large, small_root).await?; + let remapped_p2 = remap_commit(ctx.clone(), &syncers.small_to_large, p2).await?; + + let remapped_parents = hashmap! { + p1 => onto_value, + p2 => remapped_p2, + }; + let maybe_rewritten = rewrite_commit( + ctx.clone(), + merge_bcs, + &remapped_parents, + syncers.small_to_large.get_mover().clone(), + syncers.small_to_large.get_source_repo().clone(), + ) + .await?; + let mut rewritten = + maybe_rewritten.ok_or(Error::msg("merge commit was unexpectedly rewritten out"))?; + + let mut additional_file_changes = generate_additional_file_changes( + ctx.clone(), + large_root, + &large_repo, + &syncers.large_to_small, + onto_value, + ) + .await?; + + for (path, fc) in rewritten.file_changes { + additional_file_changes.insert(path, fc); + } + rewritten.file_changes = additional_file_changes; + rewritten.freeze() +} + +/// This function finds all the changed file between root and onto that are from another small repo. +/// These files needed to be added to the new merge commit to preserve bonsai semantic. +async fn generate_additional_file_changes( + ctx: CoreContext, + root: ChangesetId, + large_repo: &BlobRepo, + large_to_small: &CommitSyncer, + onto_value: ChangesetId, +) -> Result>, Error> { + let bonsai_diff = find_bonsai_diff(ctx.clone(), &large_repo, root, onto_value) + .collect() + .compat() + .await?; + + let additional_file_changes = FuturesUnordered::new(); + for diff_res in bonsai_diff { + match diff_res { + BonsaiDiffFileChange::Changed(ref path, ..) + | BonsaiDiffFileChange::ChangedReusedId(ref path, ..) + | BonsaiDiffFileChange::Deleted(ref path) => { + let maybe_new_path = large_to_small.get_mover()(path)?; + if maybe_new_path.is_some() { + continue; + } + } + } + + let fc = convert_diff_result_into_file_change_for_diamond_merge( + ctx.clone(), + &large_repo, + diff_res, + ) + .compat(); + additional_file_changes.push(fc); + } + + additional_file_changes + .try_collect::>() + .await +} + +async fn remap_commit( + ctx: CoreContext, + small_to_large_commit_syncer: &CommitSyncer, + cs_id: ChangesetId, +) -> Result { + let maybe_sync_outcome = small_to_large_commit_syncer + .get_commit_sync_outcome(ctx.clone(), cs_id) + .await?; + + let sync_outcome = maybe_sync_outcome.ok_or(format_err!( + "{} from small repo hasn't been remapped in large repo", + cs_id + ))?; + + use CommitSyncOutcome::*; + match sync_outcome { + RewrittenAs(ref cs_id, _) => Ok(*cs_id), + Preserved => Ok(cs_id), + _ => Err(format_err!( + "unexpected commit sync outcome for root, got {:?}", + sync_outcome + )), + } +} + +fn find_root(new_branch: &Vec) -> Result { + let mut cs_to_parents: HashMap<_, Vec<_>> = HashMap::new(); + for bcs in new_branch { + let cs_id = bcs.get_changeset_id(); + cs_to_parents.insert(cs_id, bcs.parents().collect()); + } + + let mut roots = vec![]; + for parents in cs_to_parents.values() { + for p in parents { + if !cs_to_parents.contains_key(p) { + roots.push(p); + } + } + } + + validate_roots(roots).map(|root| *root) +} + +async fn find_new_branch_oldest_first( + ctx: CoreContext, + small_repo: &BlobRepo, + p1: ChangesetId, + p2: ChangesetId, + small_repo_config: &RepoConfig, +) -> Result, Error> { + let fetcher = small_repo.get_changeset_fetcher(); + let skiplist_index = fetch_skiplist_index( + &ctx, + &small_repo_config.skiplist_index_blobstore_key, + &small_repo.get_blobstore().boxed(), + ) + .await?; + + let new_branch = DifferenceOfUnionsOfAncestorsNodeStream::new_with_excludes( + ctx.clone(), + &fetcher, + skiplist_index, + vec![p2], + vec![p1], + ) + .map({ + cloned!(ctx, small_repo); + move |cs| { + cs.load(ctx.clone(), small_repo.blobstore()) + .compat() + .from_err() + } + }) + .buffered(100) + .collect() + .compat() + .await?; + + Ok(new_branch.into_iter().rev().collect()) +} + +fn validate_parents(parents: Vec) -> Result<(ChangesetId, ChangesetId), Error> { + if parents.len() > 2 { + return Err(format_err!( + "too many parents, expected only 2: {:?}", + parents + )); + } + let p1 = parents.get(0).ok_or(Error::msg("not a merge commit"))?; + let p2 = parents.get(1).ok_or(Error::msg("not a merge commit"))?; + + Ok((*p1, *p2)) +} + +fn validate_roots(roots: Vec<&ChangesetId>) -> Result<&ChangesetId, Error> { + if roots.len() > 1 { + return Err(format_err!("too many roots, expected only 1: {:?}", roots)); + } + + roots + .get(0) + .cloned() + .ok_or(Error::msg("no roots found, this is not a diamond merge")) +} + +fn find_bonsai_diff( + ctx: CoreContext, + repo: &BlobRepo, + ancestor: ChangesetId, + descendant: ChangesetId, +) -> BoxStream, Error> { + ( + id_to_manifestid(ctx.clone(), repo.clone(), descendant), + id_to_manifestid(ctx.clone(), repo.clone(), ancestor), + ) + .into_future() + .map({ + cloned!(ctx, repo); + move |(d_mf, a_mf)| { + bonsai_diff( + ctx, + repo.get_blobstore(), + d_mf, + Some(a_mf).into_iter().collect(), + ) + } + }) + .flatten_stream() + .boxify() +} + +fn id_to_manifestid( + ctx: CoreContext, + repo: BlobRepo, + bcs_id: ChangesetId, +) -> impl Future { + repo.get_hg_from_bonsai_changeset(ctx.clone(), bcs_id) + .and_then({ + cloned!(ctx, repo); + move |cs_id| { + cs_id + .load(ctx.clone(), repo.blobstore()) + .compat() + .from_err() + } + }) + .map(|cs| cs.manifestid()) +} diff --git a/eden/mononoke/tests/integration/manifest_deps b/eden/mononoke/tests/integration/manifest_deps index 618ef354420fe..fe09f690ef3ac 100644 --- a/eden/mononoke/tests/integration/manifest_deps +++ b/eden/mononoke/tests/integration/manifest_deps @@ -7,6 +7,7 @@ MONONOKE_BINS = { "BACKSYNCER": "backsyncer_cmd", "EDENAPI_SERVER": "edenapi_server", "LFS_SERVER": "lfs_server", + "MEGAREPO_TOOL": "megarepotool", "MONONOKE_ADMIN": "admin", "MONONOKE_ALIAS_VERIFY": "aliasverify", "MONONOKE_BACKFILL_DERIVED_DATA": "backfill_derived_data", diff --git a/eden/mononoke/tests/integration/run_tests_getdeps.py b/eden/mononoke/tests/integration/run_tests_getdeps.py index ce6ce1442e281..2f2a90f772e6d 100755 --- a/eden/mononoke/tests/integration/run_tests_getdeps.py +++ b/eden/mononoke/tests/integration/run_tests_getdeps.py @@ -108,8 +108,6 @@ "test-lfs-to-mononoke.t", # Timed out "test-lfs-wantslfspointers.t", # Timed out "test-lfs.t", # Timed out - "test-megarepo-tool.t", # Missing MEGAREPO_TOOL - "test-mononoke-admin.t", # Missing MEGAREPO_TOOL "test-mononoke-hg-sync-job-generate-bundles-lfs-verification.t", # Timed out "test-mononoke-hg-sync-job-generate-bundles-lfs.t", # Timed out "test-push-protocol-lfs.t", # Timed out