Skip to content

Commit

Permalink
Gpu hardening master (#2753)
Browse files Browse the repository at this point in the history
* Add non interactive setup to golemsp

* Auto remove directory after Agreement finish

* Auto cleanup after activity finish

* Enable tests for gpu-hardening-master branch

* Add golemsp to deb (provider variant)

* Stabilize rust version in aarch64 builds

* Revert changes to enable tests on gpu-hardening child branches

---------

Co-authored-by: scx1332 <sieciech.czajka@golem.network>
  • Loading branch information
nieznanysprawiciel and scx1332 committed Nov 22, 2023
1 parent 9fd8d06 commit 95d9dc9
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ jobs:
- name: Setup toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
toolchain: ${{ env.rust_stable }}
target: aarch64-unknown-linux-musl
override: true

Expand Down
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ assets = [
"usr/lib/yagna/plugins/",
"755",
],
[
"target/release/golemsp",
"usr/bin/",
"755",
],
[
"README.md",
"usr/share/doc/yagna/",
Expand Down
67 changes: 48 additions & 19 deletions agent/provider/src/execution/task_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ use humantime;
use log_derive::{logfn, logfn_inputs};
use std::collections::HashMap;
use std::fs::{create_dir_all, File};
use std::iter;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use std::{fs, iter};
use structopt::StructOpt;

use ya_agreement_utils::{AgreementView, OfferTemplate};
Expand Down Expand Up @@ -106,7 +106,7 @@ pub struct TerminateActivity {
pub message: String,
}

/// Called when process exited. There are 3 reasons for process to exit:
/// Called when process exited. There are 2 reasons for process to exit:
/// - We got DestroyActivity event and killed process.
/// - ExeUnit crashed.
#[derive(Message)]
Expand All @@ -128,6 +128,13 @@ pub struct TaskRunnerConfig {
pub process_termination_timeout: Duration,
#[structopt(long, env, parse(try_from_str = humantime::parse_duration), default_value = "10s")]
pub exeunit_state_retry_interval: Duration,
/// Removes directory content after each `Activity` is destroyed.
#[structopt(long, env)]
pub auto_cleanup_activity: bool,
/// Removes directory content after `Agreement` is terminated.
/// Use this option to save disk space. Shouldn't be used when debugging.
#[structopt(long, env)]
pub auto_cleanup_agreement: bool,
#[structopt(skip = "you-forgot-to-set-session-id")]
pub session_id: String,
}
Expand Down Expand Up @@ -370,6 +377,19 @@ impl TaskRunner {
msg.activity_id
);

if self.config.auto_cleanup_activity {
let workdir = self
.agreement_dir(&msg.agreement_id)
.secure_join(&msg.activity_id);
log::info!(
"Cleaning directory {} for agreement [{}], activity [{}].",
workdir.display(),
msg.agreement_id,
msg.activity_id
);
fs::remove_dir_all(workdir).ok();
}

let destroy_msg = ActivityDestroyed {
agreement_id: msg.agreement_id.to_string(),
activity_id: msg.activity_id,
Expand Down Expand Up @@ -408,6 +428,10 @@ impl TaskRunner {
})
}

fn agreement_dir(&self, agreement_id: &str) -> PathBuf {
self.tasks_dir.secure_join(agreement_id)
}

#[logfn(Debug, fmt = "Task created: {}")]
fn create_task(
&self,
Expand All @@ -416,10 +440,7 @@ impl TaskRunner {
agreement_id: &str,
requestor_pub_key: Option<&str>,
) -> Result<Task> {
let working_dir = self
.tasks_dir
.secure_join(agreement_id)
.secure_join(activity_id);
let working_dir = self.agreement_dir(agreement_id).secure_join(activity_id);

create_dir_all(&working_dir).map_err(|error| {
anyhow!(
Expand Down Expand Up @@ -795,6 +816,16 @@ impl Handler<AgreementClosed> for TaskRunner {

self.active_agreements.remove(&agreement_id);

if self.config.auto_cleanup_agreement {
let workdir = self.agreement_dir(&agreement_id);
log::info!(
"Cleaning directory {} for agreement [{}].",
workdir.display(),
agreement_id
);
fs::remove_dir_all(workdir).ok();
}

// All activities should be destroyed by now, so it is only sanity call.
let remove_future = async move {
remove_remaining_tasks(activities, agreement_id, myself).await;
Expand All @@ -806,21 +837,19 @@ impl Handler<AgreementClosed> for TaskRunner {
}

impl Handler<AgreementBroken> for TaskRunner {
type Result = ActorResponse<Self, Result<(), Error>>;
type Result = ResponseFuture<Result<(), Error>>;

fn handle(&mut self, msg: AgreementBroken, ctx: &mut Context<Self>) -> Self::Result {
let agreement_id = msg.agreement_id;
let myself = ctx.address();
let activities = self.list_activities(&agreement_id);

self.active_agreements.remove(&agreement_id);

let remove_future = async move {
remove_remaining_tasks(activities, agreement_id, myself).await;
Ok(())
};

ActorResponse::r#async(remove_future.into_actor(self))
// We don't distinguish between `AgreementClosed` and `AgreementBroken`.
let addr = ctx.address();
async move {
addr.send(AgreementClosed {
agreement_id: msg.agreement_id,
send_terminate: false,
})
.await?
}
.boxed_local()
}
}

Expand Down
13 changes: 9 additions & 4 deletions golem_cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,20 @@ async fn my_main() -> Result</*exit code*/ i32> {
}
}

pub fn banner() {
terminal::fade_in(&format!(
pub fn banner(interactive: bool) {
let banner = format!(
include_str!("banner.txt"),
version = ya_compile_time_utils::semver_str!(),
git_commit = ya_compile_time_utils::git_rev(),
date = ya_compile_time_utils::build_date(),
build = ya_compile_time_utils::build_number_str().unwrap_or("-"),
))
.unwrap();
);

if interactive {
terminal::fade_in(&banner).unwrap();
} else {
println!("{banner}")
}
}

#[actix_rt::main]
Expand Down
50 changes: 35 additions & 15 deletions golem_cli/src/setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,15 @@ pub struct RunConfig {
set = clap::ArgSettings::Global
)]
pub log_dir: Option<PathBuf>,
/// Don't prompt user for configuration, but use defaults instead.
#[structopt(long)]
pub no_interactive: bool,
}

pub async fn setup(run_config: &RunConfig, force: bool) -> Result<i32> {
let interactive = !run_config.no_interactive;
if force {
super::banner();
super::banner(interactive);
eprintln!("Initial node setup");
let _ = clear_stdin().await;
}
Expand All @@ -71,13 +75,20 @@ pub async fn setup(run_config: &RunConfig, force: bool) -> Result<i32> {
}

if config.node_name.is_none() || force {
let node_name = promptly::prompt_default(
"Node name ",
config
.node_name
.clone()
.unwrap_or_else(|| names::Generator::default().next().unwrap_or_default()),
)?;
let node_name = if interactive {
promptly::prompt_default(
"Node name ",
config
.node_name
.clone()
.unwrap_or_else(|| names::Generator::default().next().unwrap_or_default()),
)?
} else {
let name = names::Generator::default().next().unwrap_or_default();
println!("Node name (default={name})");
name
};

let account_msg = &config
.account
.map(|n| n.to_string())
Expand All @@ -87,14 +98,18 @@ pub async fn setup(run_config: &RunConfig, force: bool) -> Result<i32> {
run_config.account.network, account_msg
);

while let Some(account) = promptly::prompt_opt::<String, _>(&message)? {
match account.parse::<NodeId>() {
Err(e) => eprintln!("Invalid ethereum address, is should be 20-byte hex (example 0xB1974E1F44EAD2d22bB995167A709b89Fc466B6c): {}", e),
Ok(account) => {
config.account = Some(account);
break;
if interactive {
while let Some(account) = promptly::prompt_opt::<String, _>(&message)? {
match account.parse::<NodeId>() {
Err(e) => eprintln!("Invalid ethereum address, is should be 20-byte hex (example 0xB1974E1F44EAD2d22bB995167A709b89Fc466B6c): {}", e),
Ok(account) => {
config.account = Some(account);
break;
}
}
}
} else {
println!("{message}");
}

config.node_name = Some(node_name);
Expand Down Expand Up @@ -132,7 +147,12 @@ pub async fn setup(run_config: &RunConfig, force: bool) -> Result<i32> {
.collect();

let default_glm_per_h = 0.025;
let glm_per_h = promptly::prompt_default("Price GLM per hour", default_glm_per_h)?;
let glm_per_h = if interactive {
promptly::prompt_default("Price GLM per hour", default_glm_per_h)?
} else {
println!("Price GLM per hour (default={default_glm_per_h})");
default_glm_per_h
};

let mut usage = UsageDef::new();
usage.insert("CPU".into(), glm_per_h / 3600.0);
Expand Down

0 comments on commit 95d9dc9

Please sign in to comment.