Skip to content

Commit

Permalink
Merge pull request #2128 from BOINC/dpa_am_fixes2
Browse files Browse the repository at this point in the history
client: extensions, fixes to account manager functionality
  • Loading branch information
davidpanderson authored Sep 23, 2017
2 parents 692e625 + 9f5c247 commit 65869eb
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 63 deletions.
115 changes: 75 additions & 40 deletions client/acct_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,9 @@ int ACCT_MGR_OP::do_rpc(
" <gpu_ec>%f</gpu_ec>\n"
" <gpu_time>%f</gpu_time>\n"
" <njobs_success>%d</njobs_success>\n"
" <njobs_error>%d</njobs_error>\n",
" <njobs_error>%d</njobs_error>\n"
" <disk_usage>%f</disk_usage>\n"
" <disk_share>%f</disk_share>\n",
p->master_url,
p->project_name,
p->suspended_via_gui?1:0,
Expand All @@ -183,7 +185,9 @@ int ACCT_MGR_OP::do_rpc(
p->gpu_ec,
p->gpu_time,
p->njobs_success,
p->njobs_error
p->njobs_error,
p->disk_usage,
p->disk_share
);
if (p->attached_via_acct_mgr) {
fprintf(f,
Expand Down Expand Up @@ -455,54 +459,61 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
bool verified;
PROJECT* pp;
bool sig_ok;
bool got_error = false;

if (http_op_retval == 0) {
// check for failures of HTTP OP, reply parse
//
if (http_op_retval) {
msg_printf(&ami, MSG_INFO, "AM RPC HTTP failure: %s",
boincerror(http_op_retval)
);
got_error = true;
} else {
FILE* f = fopen(ACCT_MGR_REPLY_FILENAME, "r");
if (f) {
retval = parse(f);
if (retval) {
got_error = true;
msg_printf(&ami, MSG_INFO, "AM reply parse error");
}
fclose(f);
} else {
retval = ERR_FOPEN;
msg_printf(&ami, MSG_INFO, "AM reply file missing");
got_error = true;
}
} else {
error_num = http_op_retval;
}

gstate.acct_mgr_info.password_error = false;
if (error_num == ERR_BAD_PASSWD && !via_gui) {
gstate.acct_mgr_info.password_error = true;
}
// check both error_str and error_num since an account manager may only
// return a BOINC based error code for password failures or invalid
// email addresses
// if no errors so far, check for errors from AM
//
if (error_str.size()) {
msg_printf(&ami, MSG_USER_ALERT,
"%s: %s",
_("Message from account manager"),
error_str.c_str()
);
if (!error_num) {
error_num = ERR_XML_PARSE;
}
} else if (error_num) {
if (error_num == http_op_retval) {
// if it was an HTTP error, don't notify the user;
// probably the acct mgr server is down
//
msg_printf(&ami, MSG_INFO,
"Account manager RPC failed: %s", boincerror(error_num)
if (!got_error) {
gstate.acct_mgr_info.password_error = false;
if (error_num == ERR_BAD_PASSWD && !via_gui) {
gstate.acct_mgr_info.password_error = true;
}

// Show error message from AM if available.
// check both error_str and error_num since an account manager may only
// return a BOINC based error code for password failures or invalid
// email addresses
//
if (error_str.size()) {
msg_printf(&ami, MSG_USER_ALERT,
"%s: %s",
_("Message from account manager"),
error_str.c_str()
);
} else {
got_error = true;
} else if (error_num) {
msg_printf(&ami, MSG_USER_ALERT,
"%s: %s",
_("Message from account manager"),
boincerror(error_num)
);
got_error = true;
}
}

if (error_num) {
if (got_error) {
gstate.acct_mgr_info.next_rpc_time =
gstate.now
+ calculate_exponential_backoff(
Expand All @@ -513,6 +524,26 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
gstate.acct_mgr_info.nfailures++;
return;
}

// The RPC was successful
//
// Detach projects that are
// - detach_when_done
// - done
// - attached via AM
//
while (1) {
bool found = false;
for (i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
if (p->detach_when_done && !gstate.nresults_for_project(p) && p->attached_via_acct_mgr) {
gstate.detach_project(p);
found = true;
}
}
if (!found) break;
}

gstate.acct_mgr_info.nfailures = 0;

msg_printf(NULL, MSG_INFO, "Account manager contact succeeded");
Expand Down Expand Up @@ -559,15 +590,6 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
//
for (i=0; i<accounts.size(); i++) {
AM_ACCOUNT& acct = accounts[i];
retval = check_string_signature2(
acct.url.c_str(), acct.url_signature, ami.signing_key, verified
);
if (retval || !verified) {
msg_printf(NULL, MSG_INTERNAL_ERROR,
"Bad signature for URL %s", acct.url.c_str()
);
continue;
}
pp = gstate.lookup_project(acct.url.c_str());
if (pp) {
if (acct.detach) {
Expand Down Expand Up @@ -603,12 +625,16 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
pp->attached_via_acct_mgr = true;
if (acct.dont_request_more_work.present) {
pp->dont_request_more_work = acct.dont_request_more_work.value;
} else {
pp->dont_request_more_work = false;
}
if (acct.detach_when_done.present) {
pp->detach_when_done = acct.detach_when_done.value;
if (pp->detach_when_done) {
pp->dont_request_more_work = true;
}
} else {
pp->detach_when_done = false;
}

// initiate a scheduler RPC if requested by AMS
Expand Down Expand Up @@ -656,6 +682,15 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
} else {
// here we don't already have the project.
//
retval = check_string_signature2(
acct.url.c_str(), acct.url_signature, ami.signing_key, verified
);
if (retval || !verified) {
msg_printf(NULL, MSG_INTERNAL_ERROR,
"Bad signature for URL %s", acct.url.c_str()
);
continue;
}
if (acct.authenticator.empty()) {
msg_printf(NULL, MSG_INFO,
"Account manager reply missing authenticator for %s",
Expand Down
30 changes: 13 additions & 17 deletions client/client_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1444,28 +1444,24 @@ bool CLIENT_STATE::garbage_collect() {
// because detach_project() calls garbage_collect_always(),
// and we need to avoid infinite recursion
//
if (acct_mgr_info.using_am()) {
// If we're using an AM,
// start an AM RPC rather than detaching the projects;
// the RPC completion handler will detach them.
// This way the AM will be informed of their work done.
//
for (unsigned i=0; i<projects.size(); i++) {
PROJECT* p = projects[i];
if (p->detach_when_done && !nresults_for_project(p)) {
acct_mgr_info.next_rpc_time = 0;
acct_mgr_info.poll();
break;
}
}
} else {
while (1) {
bool found = false;
for (unsigned i=0; i<projects.size(); i++) {
PROJECT* p = projects[i];
if (p->detach_when_done && !nresults_for_project(p)) {
detach_project(p);
action = true;
// If we're using an AM,
// wait until the next successful RPC to detach project,
// so the AM will be informed of its work done.
//
if (!p->attached_via_acct_mgr) {
msg_printf(p, MSG_INFO, "Detaching - no more tasks");
detach_project(p);
action = true;
found = true;
}
}
}
if (!found) break;
}
#endif
return action;
Expand Down
8 changes: 6 additions & 2 deletions client/project.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,8 @@ int PROJECT::parse_state(XML_PARSER& xp) {
if (xp.parse_double("cpu_time", cpu_time)) continue;
if (xp.parse_double("gpu_ec", gpu_ec)) continue;
if (xp.parse_double("gpu_time", gpu_time)) continue;
if (xp.parse_double("disk_usage", disk_usage)) continue;
if (xp.parse_double("disk_share", disk_share)) continue;
#ifdef SIM
if (xp.match_tag("available")) {
available.parse(xp, "/available");
Expand Down Expand Up @@ -529,8 +531,10 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
" <cpu_ec>%f</cpu_ec>\n"
" <cpu_time>%f</cpu_time>\n"
" <gpu_ec>%f</gpu_ec>\n"
" <gpu_time>%f</gpu_time>\n",
cpu_ec, cpu_time, gpu_ec, gpu_time
" <gpu_time>%f</gpu_time>\n"
" <disk_usage>%f</disk_usage>\n"
" <disk_share>%f</disk_share>\n",
cpu_ec, cpu_time, gpu_ec, gpu_time, disk_usage, disk_share
);
}
out.printf(
Expand Down
8 changes: 4 additions & 4 deletions client/project.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ struct PROJECT : PROJ_AM {
// Reasons are enumerated in lib/common_defs.h
bool trickle_up_pending;
// have trickle up to send
double disk_usage;
// computed by get_disk_usages()
double disk_share;
// computed by get_disk_shares();

/////// END OF ITEMS STORED IN client_state.xml

Expand All @@ -171,10 +175,6 @@ struct PROJECT : PROJ_AM {
// to make sure they haven't been tampered with.
// This provides only the illusion of security.
bool use_symlinks;
double disk_usage;
// computed by get_disk_usages()
double disk_share;
// computed by get_disk_shares();
bool report_results_immediately;

// items sent in scheduler replies,
Expand Down

0 comments on commit 65869eb

Please sign in to comment.