Skip to content

Commit

Permalink
Create can generate candidates health check (#2821)
Browse files Browse the repository at this point in the history
Summary:

This healthcheck values when the last time you ran a trial was to see if it should really be concerned.

Differential Revision: D63836018
  • Loading branch information
Daniel Cohen authored and facebook-github-bot committed Oct 3, 2024
1 parent 318ebe7 commit a290227
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ax/analysis/healthcheck/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@

# pyre-strict

from ax.analysis.healthcheck.can_generate_candidates import (
CanGenerateCandidatesAnalysis,
)
from ax.analysis.healthcheck.healthcheck_analysis import (
HealthcheckAnalysis,
HealthcheckAnalysisCard,
HealthcheckStatus,
)

__all__ = [
"CanGenerateCandidatesAnalysis",
"HealthcheckAnalysis",
"HealthcheckAnalysisCard",
"HealthcheckStatus",
Expand Down
89 changes: 89 additions & 0 deletions ax/analysis/healthcheck/can_generate_candidates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import json
from datetime import datetime
from typing import Optional

import pandas as pd
from ax.analysis.analysis import AnalysisCardLevel

from ax.analysis.healthcheck.healthcheck_analysis import (
HealthcheckAnalysis,
HealthcheckAnalysisCard,
HealthcheckStatus,
)
from ax.core.experiment import Experiment
from ax.modelbridge.generation_strategy import GenerationStrategy
from pyre_extensions import none_throws


class CanGenerateCandidatesAnalysis(HealthcheckAnalysis):
REASON_PREFIX: str = "This experiment cannot generate candidates.\nREASON:"
LAST_RUN_TEMPLATE: str = "\nLAST TRIAL RUN: {days} day(s) ago"

def __init__(
self, can_generate_candidates: bool, reason: str, days_till_fail: int
) -> None:
"""
Args:
can_generate_candidates: Whether the experiment can generate candidates. If
True, the status is automatically set to PASS.
reason: The reason why the experiment cannot generate candidates.
days_till_fail: The number of days since the last trial was run before
the status is set to FAIL.
"""
self.can_generate_candidates = can_generate_candidates
self.reason = reason
self.days_till_fail = days_till_fail

def compute(
self,
experiment: Optional[Experiment] = None,
generation_strategy: Optional[GenerationStrategy] = None,
) -> HealthcheckAnalysisCard:
status = HealthcheckStatus.PASS
subtitle = ""
level = AnalysisCardLevel.LOW
if not self.can_generate_candidates:
subtitle = f"{self.REASON_PREFIX}{self.reason}"
most_recent_run_time = max(
[
t.time_run_started
for t in none_throws(experiment).trials.values()
if t.time_run_started is not None
],
default=None,
)
if most_recent_run_time is None:
status = HealthcheckStatus.FAIL
level = AnalysisCardLevel.HIGH
else:
days_since_last_run = (datetime.now() - most_recent_run_time).days
if days_since_last_run > self.days_till_fail:
status = HealthcheckStatus.FAIL
level = AnalysisCardLevel.HIGH
else:
status = HealthcheckStatus.WARNING
level = AnalysisCardLevel.MID
subtitle += self.LAST_RUN_TEMPLATE.format(days=days_since_last_run)

return HealthcheckAnalysisCard(
name="CanGenerateCandidates",
title="Can Generate Candidates",
blob=json.dumps(
{
"status": status,
}
),
subtitle=subtitle,
df=pd.DataFrame(
{
"status": [status],
"reason": [self.reason],
}
),
level=level,
)
144 changes: 144 additions & 0 deletions ax/analysis/healthcheck/tests/test_can_generate_candidates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from datetime import datetime, timedelta

import pandas as pd
from ax.analysis.analysis import AnalysisCardLevel
from ax.analysis.healthcheck.can_generate_candidates import (
CanGenerateCandidatesAnalysis,
)
from ax.analysis.healthcheck.healthcheck_analysis import HealthcheckStatus
from ax.core.base_trial import TrialStatus
from ax.utils.common.testutils import TestCase
from ax.utils.testing.core_stubs import get_branin_experiment
from pandas import testing as pdt


class TestCanGenerateCandidates(TestCase):
def test_passes_if_can_generate(self) -> None:
# GIVEN we can generate candidates
# WHEN we run the healthcheck
card = CanGenerateCandidatesAnalysis(
can_generate_candidates=True,
reason="No problems found.",
days_till_fail=0,
).compute(experiment=None, generation_strategy=None)
# THEN it is PASSES
self.assertEqual(card.get_status(), HealthcheckStatus.PASS)
self.assertEqual(card.name, "CanGenerateCandidates")
self.assertEqual(card.title, "Can Generate Candidates")
self.assertEqual(card.subtitle, "")
self.assertEqual(card.level, AnalysisCardLevel.LOW)
pdt.assert_frame_equal(
card.df,
pd.DataFrame(
{
"status": [HealthcheckStatus.PASS.value],
"reason": ["No problems found."],
}
),
)

def test_warns_if_a_trial_was_recently_run(self) -> None:
# GIVEN an experiment with a recently run trial
experiment = get_branin_experiment(with_trial=True)
trial = experiment.trials[0]
trial.mark_running(no_runner_required=True)
trial._time_run_started = datetime.now() - timedelta(days=1)
# WHEN we run the healthcheck
card = CanGenerateCandidatesAnalysis(
can_generate_candidates=False,
reason="The data is borked.",
days_till_fail=2,
).compute(experiment=experiment, generation_strategy=None)
# THEN it is a WARNING
self.assertEqual(card.get_status(), HealthcheckStatus.WARNING)
self.assertEqual(card.name, "CanGenerateCandidates")
self.assertEqual(card.title, "Can Generate Candidates")
self.assertEqual(
card.subtitle,
(
f"{CanGenerateCandidatesAnalysis.REASON_PREFIX}"
"The data is borked.\n"
"LAST TRIAL RUN: 1 day(s) ago"
),
)
self.assertEqual(card.level, AnalysisCardLevel.MID)
pdt.assert_frame_equal(
card.df,
pd.DataFrame(
{
"status": [HealthcheckStatus.WARNING.value],
"reason": ["The data is borked."],
}
),
)

def test_is_fail_no_trials_have_been_run(self) -> None:
# GIVEN an experiment with a candidate trial
experiment = get_branin_experiment(with_trial=True)
trial = experiment.trials[0]
self.assertEqual(trial.status, TrialStatus.CANDIDATE)
# WHEN we run the healthcheck
card = CanGenerateCandidatesAnalysis(
can_generate_candidates=False,
reason="The data is gone.",
days_till_fail=2,
).compute(experiment=experiment, generation_strategy=None)
# THEN it is an ERROR
self.assertEqual(card.get_status(), HealthcheckStatus.FAIL)
self.assertEqual(card.name, "CanGenerateCandidates")
self.assertEqual(card.title, "Can Generate Candidates")
self.assertEqual(
card.subtitle,
f"{CanGenerateCandidatesAnalysis.REASON_PREFIX}The data is gone.",
)
self.assertEqual(card.level, AnalysisCardLevel.HIGH)
pdt.assert_frame_equal(
card.df,
pd.DataFrame(
{
"status": [HealthcheckStatus.FAIL.value],
"reason": ["The data is gone."],
}
),
)

def test_is_fail_if_no_trial_was_recently_run(self) -> None:
# GIVEN an experiment with an old trial
experiment = get_branin_experiment(with_trial=True)
trial = experiment.trials[0]
trial.mark_running(no_runner_required=True)
trial._time_run_started = datetime.now() - timedelta(days=3)
trial.mark_completed()
# WHEN we run the healthcheck
card = CanGenerateCandidatesAnalysis(
can_generate_candidates=False,
reason="The data is old.",
days_till_fail=1,
).compute(experiment=experiment, generation_strategy=None)
# THEN it is an ERROR
self.assertEqual(card.get_status(), HealthcheckStatus.FAIL)
self.assertEqual(card.name, "CanGenerateCandidates")
self.assertEqual(card.title, "Can Generate Candidates")
self.assertEqual(
card.subtitle,
(
f"{CanGenerateCandidatesAnalysis.REASON_PREFIX}"
"The data is old.\n"
"LAST TRIAL RUN: 3 day(s) ago"
),
)
self.assertEqual(card.level, AnalysisCardLevel.HIGH)
pdt.assert_frame_equal(
card.df,
pd.DataFrame(
{
"status": [HealthcheckStatus.FAIL.value],
"reason": ["The data is old."],
}
),
)
8 changes: 8 additions & 0 deletions sphinx/source/analysis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ Plotly Analysis
:undoc-members:
:show-inheritance:

Can Generate Candidates Healthcheck Analysis
~~~~~~~~~~~~~~~

.. automodule:: ax.analysis.healthcheck.can_generate_candidates
:members:
:undoc-members:
:show-inheritance:

Healthcheck Analysis
~~~~~~~~~~~~~~~

Expand Down

0 comments on commit a290227

Please sign in to comment.