Skip to content

Commit

Permalink
Remove signal-unsafe calls from signal handlers
Browse files Browse the repository at this point in the history
Functions `elog` and `ereport` are unsafe to use in signal handlers
since they call `malloc`. This commit removes them from signal
handlers.

Fixes #4200
  • Loading branch information
mkindahl committed Mar 30, 2022
1 parent 347b45f commit 81b71b6
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 48 deletions.
12 changes: 4 additions & 8 deletions src/bgw/job.c
Original file line number Diff line number Diff line change
Expand Up @@ -807,14 +807,10 @@ ts_bgw_job_timeout_at(BgwJob *job, TimestampTz start_time)

static void handle_sigterm(SIGNAL_ARGS)
{
/*
* do not use a level >= ERROR because we don't want to exit here but
* rather only during CHECK_FOR_INTERRUPTS
*/
ereport(LOG,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating TimescaleDB background job \"%s\" due to administrator command",
MyBgworkerEntry->bgw_name)));
/* Do not use anything that calls malloc() inside a signal handler since
* malloc() is not signal-safe. This includes ereport() */
write_stderr("terminating TimescaleDB background job \"%s\" due to administrator command\n",
MyBgworkerEntry->bgw_name);
die(postgres_signal_arg);
}

Expand Down
10 changes: 3 additions & 7 deletions src/bgw/scheduler.c
Original file line number Diff line number Diff line change
Expand Up @@ -834,13 +834,9 @@ ts_bgw_scheduler_setup_mctx()

static void handle_sigterm(SIGNAL_ARGS)
{
/*
* do not use a level >= ERROR because we don't want to exit here but
* rather only during CHECK_FOR_INTERRUPTS
*/
ereport(LOG,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating TimescaleDB job scheduler due to administrator command")));
/* Do not use anything that calls malloc() inside a signal handler since
* malloc() is not signal-safe. This includes ereport() */
write_stderr("terminating TimescaleDB job scheduler due to administrator command\n");
die(postgres_signal_arg);
}

Expand Down
5 changes: 3 additions & 2 deletions src/loader/bgw_launcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,8 @@ static void launcher_sigterm(SIGNAL_ARGS)
{
/* Do not use anything that calls malloc() inside a signal handler since
* malloc() is not signal-safe. This includes ereport() */
write_stderr("terminating TimescaleDB background worker launcher due to administrator command");
write_stderr(
"terminating TimescaleDB background worker launcher due to administrator command\n");
die(postgres_signal_arg);
}

Expand Down Expand Up @@ -807,7 +808,7 @@ static void entrypoint_sigterm(SIGNAL_ARGS)
{
/* Do not use anything that calls malloc() inside a signal handler since
* malloc() is not signal-safe. This includes ereport() */
write_stderr("terminating TimescaleDB scheduler entrypoint due to administrator command");
write_stderr("terminating TimescaleDB scheduler entrypoint due to administrator command\n");
die(postgres_signal_arg);
}

Expand Down
3 changes: 1 addition & 2 deletions test/src/bgw/scheduler_mock.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ static pqsigfunc prev_signal_func = NULL;

static void log_terminate_signal(SIGNAL_ARGS)
{
elog(WARNING, "Job got term signal");

write_stderr("job got term signal\n");
if (prev_signal_func != NULL)
prev_signal_func(postgres_signal_arg);
}
Expand Down
48 changes: 19 additions & 29 deletions tsl/test/expected/bgw_db_scheduler.out
Original file line number Diff line number Diff line change
Expand Up @@ -748,16 +748,14 @@ SELECT ts_bgw_db_scheduler_test_wait_for_scheduler_finish();
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+---------------------------------------------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | Job got term signal
2 | test_job_3_long | terminating TimescaleDB background job "test_job_3_long" due to administrator command
3 | test_job_3_long | terminating connection due to administrator command
1 | test_job_3_long | terminating connection due to administrator command
2 | DB Scheduler | job 1004 failed
(7 rows)
(5 rows)

SELECT job_id, last_run_success, total_runs, total_successes, total_failures, total_crashes
FROM _timescaledb_internal.bgw_job_stat;
Expand All @@ -781,21 +779,19 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+---------------------------------------------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | Job got term signal
2 | test_job_3_long | terminating TimescaleDB background job "test_job_3_long" due to administrator command
3 | test_job_3_long | terminating connection due to administrator command
1 | test_job_3_long | terminating connection due to administrator command
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
2 | DB Scheduler | job 1004 failed
1 | DB Scheduler | [TESTING] Registered new background worker
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | After sleep job 3
(12 rows)
(10 rows)

--Test sending a SIGHUP to a job
\c :TEST_DBNAME :ROLE_SUPERUSER
Expand Down Expand Up @@ -921,17 +917,14 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+---------------------------------------------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
2 | DB Scheduler | terminating TimescaleDB job scheduler due to administrator command
3 | DB Scheduler | terminating connection due to administrator command
2 | DB Scheduler | terminating connection due to administrator command
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | Job got term signal
2 | test_job_3_long | terminating TimescaleDB background job "test_job_3_long" due to administrator command
3 | test_job_3_long | terminating connection due to administrator command
(8 rows)
1 | test_job_3_long | terminating connection due to administrator command
(5 rows)

--After a SIGTERM to scheduler and jobs, the jobs are considered crashed and there is a imposed wait of 5 min before a job can be run.
--See that there is no run again because of the crash-imposed wait (not run with the 10ms retry_period)
Expand Down Expand Up @@ -963,23 +956,20 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+---------------------------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
2 | DB Scheduler | terminating TimescaleDB job scheduler due to administrator command
3 | DB Scheduler | terminating connection due to administrator command
2 | DB Scheduler | terminating connection due to administrator command
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | Job got term signal
2 | test_job_3_long | terminating TimescaleDB background job "test_job_3_long" due to administrator command
3 | test_job_3_long | terminating connection due to administrator command
1 | test_job_3_long | terminating connection due to administrator command
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
1 | DB Scheduler | [TESTING] Registered new background worker
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | After sleep job 3
(14 rows)
(11 rows)

CREATE FUNCTION wait_for_timer_to_run(started_at INTEGER, spins INTEGER=:TEST_SPINWAIT_ITERS) RETURNS BOOLEAN LANGUAGE PLPGSQL AS
$BODY$
Expand Down

0 comments on commit 81b71b6

Please sign in to comment.