-
Notifications
You must be signed in to change notification settings - Fork 319
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create can generate candidates health check (#2821)
Summary: This healthcheck values when the last time you ran a trial was to see if it should really be concerned. Differential Revision: D63836018
- Loading branch information
1 parent
318ebe7
commit a290227
Showing
4 changed files
with
245 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import json | ||
from datetime import datetime | ||
from typing import Optional | ||
|
||
import pandas as pd | ||
from ax.analysis.analysis import AnalysisCardLevel | ||
|
||
from ax.analysis.healthcheck.healthcheck_analysis import ( | ||
HealthcheckAnalysis, | ||
HealthcheckAnalysisCard, | ||
HealthcheckStatus, | ||
) | ||
from ax.core.experiment import Experiment | ||
from ax.modelbridge.generation_strategy import GenerationStrategy | ||
from pyre_extensions import none_throws | ||
|
||
|
||
class CanGenerateCandidatesAnalysis(HealthcheckAnalysis): | ||
REASON_PREFIX: str = "This experiment cannot generate candidates.\nREASON:" | ||
LAST_RUN_TEMPLATE: str = "\nLAST TRIAL RUN: {days} day(s) ago" | ||
|
||
def __init__( | ||
self, can_generate_candidates: bool, reason: str, days_till_fail: int | ||
) -> None: | ||
""" | ||
Args: | ||
can_generate_candidates: Whether the experiment can generate candidates. If | ||
True, the status is automatically set to PASS. | ||
reason: The reason why the experiment cannot generate candidates. | ||
days_till_fail: The number of days since the last trial was run before | ||
the status is set to FAIL. | ||
""" | ||
self.can_generate_candidates = can_generate_candidates | ||
self.reason = reason | ||
self.days_till_fail = days_till_fail | ||
|
||
def compute( | ||
self, | ||
experiment: Optional[Experiment] = None, | ||
generation_strategy: Optional[GenerationStrategy] = None, | ||
) -> HealthcheckAnalysisCard: | ||
status = HealthcheckStatus.PASS | ||
subtitle = "" | ||
level = AnalysisCardLevel.LOW | ||
if not self.can_generate_candidates: | ||
subtitle = f"{self.REASON_PREFIX}{self.reason}" | ||
most_recent_run_time = max( | ||
[ | ||
t.time_run_started | ||
for t in none_throws(experiment).trials.values() | ||
if t.time_run_started is not None | ||
], | ||
default=None, | ||
) | ||
if most_recent_run_time is None: | ||
status = HealthcheckStatus.FAIL | ||
level = AnalysisCardLevel.HIGH | ||
else: | ||
days_since_last_run = (datetime.now() - most_recent_run_time).days | ||
if days_since_last_run > self.days_till_fail: | ||
status = HealthcheckStatus.FAIL | ||
level = AnalysisCardLevel.HIGH | ||
else: | ||
status = HealthcheckStatus.WARNING | ||
level = AnalysisCardLevel.MID | ||
subtitle += self.LAST_RUN_TEMPLATE.format(days=days_since_last_run) | ||
|
||
return HealthcheckAnalysisCard( | ||
name="CanGenerateCandidates", | ||
title="Can Generate Candidates", | ||
blob=json.dumps( | ||
{ | ||
"status": status, | ||
} | ||
), | ||
subtitle=subtitle, | ||
df=pd.DataFrame( | ||
{ | ||
"status": [status], | ||
"reason": [self.reason], | ||
} | ||
), | ||
level=level, | ||
) |
144 changes: 144 additions & 0 deletions
144
ax/analysis/healthcheck/tests/test_can_generate_candidates.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from datetime import datetime, timedelta | ||
|
||
import pandas as pd | ||
from ax.analysis.analysis import AnalysisCardLevel | ||
from ax.analysis.healthcheck.can_generate_candidates import ( | ||
CanGenerateCandidatesAnalysis, | ||
) | ||
from ax.analysis.healthcheck.healthcheck_analysis import HealthcheckStatus | ||
from ax.core.base_trial import TrialStatus | ||
from ax.utils.common.testutils import TestCase | ||
from ax.utils.testing.core_stubs import get_branin_experiment | ||
from pandas import testing as pdt | ||
|
||
|
||
class TestCanGenerateCandidates(TestCase): | ||
def test_passes_if_can_generate(self) -> None: | ||
# GIVEN we can generate candidates | ||
# WHEN we run the healthcheck | ||
card = CanGenerateCandidatesAnalysis( | ||
can_generate_candidates=True, | ||
reason="No problems found.", | ||
days_till_fail=0, | ||
).compute(experiment=None, generation_strategy=None) | ||
# THEN it is PASSES | ||
self.assertEqual(card.get_status(), HealthcheckStatus.PASS) | ||
self.assertEqual(card.name, "CanGenerateCandidates") | ||
self.assertEqual(card.title, "Can Generate Candidates") | ||
self.assertEqual(card.subtitle, "") | ||
self.assertEqual(card.level, AnalysisCardLevel.LOW) | ||
pdt.assert_frame_equal( | ||
card.df, | ||
pd.DataFrame( | ||
{ | ||
"status": [HealthcheckStatus.PASS.value], | ||
"reason": ["No problems found."], | ||
} | ||
), | ||
) | ||
|
||
def test_warns_if_a_trial_was_recently_run(self) -> None: | ||
# GIVEN an experiment with a recently run trial | ||
experiment = get_branin_experiment(with_trial=True) | ||
trial = experiment.trials[0] | ||
trial.mark_running(no_runner_required=True) | ||
trial._time_run_started = datetime.now() - timedelta(days=1) | ||
# WHEN we run the healthcheck | ||
card = CanGenerateCandidatesAnalysis( | ||
can_generate_candidates=False, | ||
reason="The data is borked.", | ||
days_till_fail=2, | ||
).compute(experiment=experiment, generation_strategy=None) | ||
# THEN it is a WARNING | ||
self.assertEqual(card.get_status(), HealthcheckStatus.WARNING) | ||
self.assertEqual(card.name, "CanGenerateCandidates") | ||
self.assertEqual(card.title, "Can Generate Candidates") | ||
self.assertEqual( | ||
card.subtitle, | ||
( | ||
f"{CanGenerateCandidatesAnalysis.REASON_PREFIX}" | ||
"The data is borked.\n" | ||
"LAST TRIAL RUN: 1 day(s) ago" | ||
), | ||
) | ||
self.assertEqual(card.level, AnalysisCardLevel.MID) | ||
pdt.assert_frame_equal( | ||
card.df, | ||
pd.DataFrame( | ||
{ | ||
"status": [HealthcheckStatus.WARNING.value], | ||
"reason": ["The data is borked."], | ||
} | ||
), | ||
) | ||
|
||
def test_is_fail_no_trials_have_been_run(self) -> None: | ||
# GIVEN an experiment with a candidate trial | ||
experiment = get_branin_experiment(with_trial=True) | ||
trial = experiment.trials[0] | ||
self.assertEqual(trial.status, TrialStatus.CANDIDATE) | ||
# WHEN we run the healthcheck | ||
card = CanGenerateCandidatesAnalysis( | ||
can_generate_candidates=False, | ||
reason="The data is gone.", | ||
days_till_fail=2, | ||
).compute(experiment=experiment, generation_strategy=None) | ||
# THEN it is an ERROR | ||
self.assertEqual(card.get_status(), HealthcheckStatus.FAIL) | ||
self.assertEqual(card.name, "CanGenerateCandidates") | ||
self.assertEqual(card.title, "Can Generate Candidates") | ||
self.assertEqual( | ||
card.subtitle, | ||
f"{CanGenerateCandidatesAnalysis.REASON_PREFIX}The data is gone.", | ||
) | ||
self.assertEqual(card.level, AnalysisCardLevel.HIGH) | ||
pdt.assert_frame_equal( | ||
card.df, | ||
pd.DataFrame( | ||
{ | ||
"status": [HealthcheckStatus.FAIL.value], | ||
"reason": ["The data is gone."], | ||
} | ||
), | ||
) | ||
|
||
def test_is_fail_if_no_trial_was_recently_run(self) -> None: | ||
# GIVEN an experiment with an old trial | ||
experiment = get_branin_experiment(with_trial=True) | ||
trial = experiment.trials[0] | ||
trial.mark_running(no_runner_required=True) | ||
trial._time_run_started = datetime.now() - timedelta(days=3) | ||
trial.mark_completed() | ||
# WHEN we run the healthcheck | ||
card = CanGenerateCandidatesAnalysis( | ||
can_generate_candidates=False, | ||
reason="The data is old.", | ||
days_till_fail=1, | ||
).compute(experiment=experiment, generation_strategy=None) | ||
# THEN it is an ERROR | ||
self.assertEqual(card.get_status(), HealthcheckStatus.FAIL) | ||
self.assertEqual(card.name, "CanGenerateCandidates") | ||
self.assertEqual(card.title, "Can Generate Candidates") | ||
self.assertEqual( | ||
card.subtitle, | ||
( | ||
f"{CanGenerateCandidatesAnalysis.REASON_PREFIX}" | ||
"The data is old.\n" | ||
"LAST TRIAL RUN: 3 day(s) ago" | ||
), | ||
) | ||
self.assertEqual(card.level, AnalysisCardLevel.HIGH) | ||
pdt.assert_frame_equal( | ||
card.df, | ||
pd.DataFrame( | ||
{ | ||
"status": [HealthcheckStatus.FAIL.value], | ||
"reason": ["The data is old."], | ||
} | ||
), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters