-
Notifications
You must be signed in to change notification settings - Fork 14.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix issue with stale dags not being marked inactive by Scheduler #11462
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1311,23 +1311,11 @@ def _execute(self) -> None: | |
# Start after resetting orphaned tasks to avoid stressing out DB. | ||
self.processor_agent.start() | ||
|
||
execute_start_time = timezone.utcnow() | ||
|
||
self._run_scheduler_loop() | ||
|
||
# Stop any processors | ||
self.processor_agent.terminate() | ||
|
||
# Verify that all files were processed, and if so, deactivate DAGs that | ||
# haven't been touched by the scheduler as they likely have been | ||
# deleted. | ||
if self.processor_agent.all_files_processed: | ||
self.log.info( | ||
"Deactivating DAGs that haven't been touched since %s", | ||
execute_start_time.isoformat() | ||
) | ||
models.DAG.deactivate_stale_dags(execute_start_time) | ||
|
||
self.executor.end() | ||
|
||
settings.Session.remove() # type: ignore | ||
|
@@ -1364,6 +1352,7 @@ def _run_scheduler_loop(self) -> None: | |
#. Heartbeat executor | ||
#. Execute queued tasks in executor asynchronously | ||
#. Sync on the states of running tasks | ||
#. Deactivate stale/deleted dags | ||
|
||
Following is a graphic representation of these steps. | ||
|
||
|
@@ -1374,6 +1363,7 @@ def _run_scheduler_loop(self) -> None: | |
if not self.processor_agent: | ||
raise ValueError("Processor agent is not started.") | ||
is_unit_test: bool = conf.getboolean('core', 'unit_test_mode') | ||
stale_dag_cleanup_timeout: int = conf.getint('scheduler', 'stale_dag_cleanup_timeout', 600) | ||
|
||
for loop_count in itertools.count(start=1): | ||
loop_start_time = time.time() | ||
|
@@ -1409,6 +1399,15 @@ def _run_scheduler_loop(self) -> None: | |
# usage when "idle" | ||
time.sleep(self._processor_poll_interval) | ||
|
||
# Verify that all files were processed, and if so, deactivate DAGs that | ||
# haven't been touched by the scheduler as they likely have been deleted. | ||
if self.processor_agent.all_files_processed: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ashb do we still need to keep this check? I feel that we don't need to keep it anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, not needed anymore, correct. |
||
self.log.info( | ||
"Deactivating DAGs that haven't been touched in %s seconds", | ||
stale_dag_cleanup_timeout | ||
) | ||
models.DAG.deactivate_stale_dags(timezone.utcnow() - timedelta(seconds=stale_dag_cleanup_timeout)) | ||
|
||
if loop_count >= self.num_runs > 0: | ||
self.log.info( | ||
"Exiting scheduler loop as requested number of runs (%d - got to %d) has been reached", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1842,16 +1842,17 @@ def deactivate_stale_dags(expiration_date, session=None): | |
:type expiration_date: datetime | ||
:return: None | ||
""" | ||
for dag in session.query( | ||
DagModel).filter(DagModel.last_scheduler_run < expiration_date, | ||
DagModel.is_active).all(): | ||
query = session.query(DagModel).filter(DagModel.last_scheduler_run < expiration_date, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oooh crap, I don't update this column anymore on master, so I don't think this is right. But there also isn't any global setting I do update anymore. Even SerialzedDag.updated_at might not be right, as if the dag hasn't changed, that value won't be updated anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ooh is it? Then should it be taken care of while doing serialization itself? Like, mark the dag inactive if it doesn't find it anymore? |
||
DagModel.is_active) | ||
|
||
for dag in with_row_locks(query, of=DagModel, **skip_locked(session=session)).all(): | ||
log.info( | ||
"Deactivating DAG ID %s since it was last touched by the scheduler at %s", | ||
dag.dag_id, dag.last_scheduler_run.isoformat() | ||
) | ||
dag.is_active = False | ||
session.merge(dag) | ||
session.commit() | ||
session.flush() | ||
|
||
@staticmethod | ||
@provide_session | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lets not re-create the timedelta object more often than we need to.