diff --git a/src/modules/job-list/job_data.c b/src/modules/job-list/job_data.c index c7b4df40663f..5b425c0cc43e 100644 --- a/src/modules/job-list/job_data.c +++ b/src/modules/job-list/job_data.c @@ -266,13 +266,12 @@ static int parse_jobspec (struct job *job, const char *s, bool allow_nonfatal) json_error_t error; json_t *jobspec_job = NULL; json_t *tasks; - int rc = -1; if (!(job->jobspec = json_loads (s, 0, &error))) { flux_log (job->h, LOG_ERR, "%s: job %s invalid jobspec: %s", __FUNCTION__, idf58 (job->id), error.text); - goto error; + goto nonfatal_error; } if (json_unpack_ex (job->jobspec, &error, 0, @@ -344,9 +343,7 @@ static int parse_jobspec (struct job *job, const char *s, bool allow_nonfatal) /* nonfatal error - jobspec illegal, but we'll continue on. job * listing will return whatever data is available */ nonfatal_error: - rc = allow_nonfatal ? 0 : -1; -error: - return rc; + return allow_nonfatal ? 0 : -1; } int job_parse_jobspec (struct job *job, const char *s) diff --git a/src/modules/job-list/job_data.h b/src/modules/job-list/job_data.h index b6d0b08e83c9..39c53d783dcb 100644 --- a/src/modules/job-list/job_data.h +++ b/src/modules/job-list/job_data.h @@ -95,8 +95,11 @@ struct job *job_create (flux_t *h, flux_jobid_t id); /* Parse and internally cache jobspec. Set values for: * - job name + * - queue * - ntasks * - nnodes (if available) + * - ncores (if possible) + * - duration */ int job_parse_jobspec (struct job *job, const char *s); @@ -109,6 +112,8 @@ int job_parse_jobspec_fatal (struct job *job, const char *s); * - expiration * - nnodes * - nodelist + * - ncores + * - ntasks (if necessary) */ int job_parse_R (struct job *job, const char *s); diff --git a/src/modules/job-list/job_state.c b/src/modules/job-list/job_state.c index 3b58c3ccad63..71137d97c416 100644 --- a/src/modules/job-list/job_state.c +++ b/src/modules/job-list/job_state.c @@ -47,7 +47,10 @@ struct state_transition { flux_job_state_t state; - bool processed; + bool processing; /* indicates we are waiting for + * current update to complete */ + bool finished; /* indicates we are done, can remove + * from list */ double timestamp; int flags; flux_job_state_t expected_state; @@ -294,7 +297,7 @@ static void state_depend_lookup_continuation (flux_future_t *f, void *arg) st = zlist_head (job->next_states); assert (st); update_job_state_and_list (jsctx, job, st->state, st->timestamp); - zlist_remove (job->next_states, st); + st->finished = true; process_next_state (jsctx, job); out: @@ -344,7 +347,7 @@ static void state_run_lookup_continuation (flux_future_t *f, void *arg) st = zlist_head (job->next_states); assert (st); update_job_state_and_list (jsctx, job, st->state, st->timestamp); - zlist_remove (job->next_states, st); + st->finished = true; process_next_state (jsctx, job); out: @@ -413,7 +416,8 @@ static int add_state_transition (struct job *job, if (!(st = calloc (1, sizeof (*st)))) return -1; st->state = newstate; - st->processed = false; + st->processing = false; + st->finished = false; st->timestamp = timestamp; st->flags = flags; st->expected_state = expected_state; @@ -437,7 +441,10 @@ static void process_next_state (struct job_state_ctx *jsctx, struct job *job) struct state_transition *st; while ((st = zlist_head (job->next_states)) - && !st->processed) { + && (!st->processing || st->finished)) { + + if (st->finished) + goto next; if ((st->flags & STATE_TRANSITION_FLAG_REVERT)) { /* only revert if the current state is what is expected */ @@ -447,15 +454,15 @@ static void process_next_state (struct job_state_ctx *jsctx, struct job *job) update_job_state_and_list (jsctx, job, st->state, st->timestamp); } else { - zlist_remove (job->next_states, st); - continue; + st->finished = true; + goto next; } } else if ((st->flags & STATE_TRANSITION_FLAG_CONDITIONAL)) { /* if current state isn't what we expected, move on */ if (job->state != st->expected_state) { - zlist_remove (job->next_states, st); - continue; + st->finished = true; + goto next; } } @@ -489,7 +496,7 @@ static void process_next_state (struct job_state_ctx *jsctx, struct job *job) return; } - st->processed = true; + st->processing = true; break; } else { @@ -502,8 +509,12 @@ static void process_next_state (struct job_state_ctx *jsctx, struct job *job) eventlog_inactive_complete (job); update_job_state_and_list (jsctx, job, st->state, st->timestamp); - zlist_remove (job->next_states, st); + st->finished = true; } + + next: + if (st->finished) + zlist_remove (job->next_states, st); } } diff --git a/src/modules/job-list/test/job_data.c b/src/modules/job-list/test/job_data.c index 3029782e6ea2..c0dd23fe40e3 100644 --- a/src/modules/job-list/test/job_data.c +++ b/src/modules/job-list/test/job_data.c @@ -277,7 +277,7 @@ struct test_ncores { { NULL, NULL, 0, 0 }, }; -static int read_file (const char *filename, void **datap) +static void read_file (const char *filename, void **datap) { int fd; ssize_t size; @@ -287,34 +287,32 @@ static int read_file (const char *filename, void **datap) /* N.B. read_all() NUL terminates buffer */ if ((size = read_all (fd, datap)) < 0) BAIL_OUT ("failed to read data %s", filename); - return fd; + close (fd); } static int parse_jobspec (struct job *job, const char *filename) { char *data; - int fd, ret; + int ret; - fd = read_file (filename, (void **)&data); + read_file (filename, (void **)&data); ret = job_parse_jobspec_fatal (job, data); free (data); - close (fd); return ret; } static int parse_R (struct job *job, const char *filename) { char *data; - int fd, ret; + int ret; - fd = read_file (filename, (void **)&data); + read_file (filename, (void **)&data); ret = job_parse_R_fatal (job, data); free (data); - close (fd); return ret; } diff --git a/t/t2260-job-list.t b/t/t2260-job-list.t index d4e722b5aff7..d06cd8b42b08 100755 --- a/t/t2260-job-list.t +++ b/t/t2260-job-list.t @@ -2533,7 +2533,7 @@ test_expect_success 'reload job-ingest with defaults' ' # we make R invalid by overwriting it in the KVS before job-list will # look it up -test_expect_success 'flux job list works on job with illegal R' ' +test_expect_success 'flux job list works on job with illegal R/json' ' ${RPC} job-list.job-state-pause 0 /dev/null \ + && [ $i -lt 5 ] + do + sleep 1 + i=$((i + 1)) + done && + test "$i" -lt "5" && + flux job list --states=inactive | grep $jobid > list_illegal_R.out && + cat list_illegal_R.out | $jq -e ".ranks == null" && + cat list_illegal_R.out | $jq -e ".nnodes == null" && + cat list_illegal_R.out | $jq -e ".nodelist == null" && + cat list_illegal_R.out | $jq -e ".expiration == null" +' + +test_expect_success NO_CHAIN_LINT 'flux job list-ids works on job with illegal R/json' ' ${RPC} job-list.job-state-pause 0 list_id_illegal_R.out & + pid=$! + wait_idsync 1 && + ${RPC} job-list.job-state-unpause 0