Skip to content

Commit

Permalink
Track job execution history
Browse files Browse the repository at this point in the history
In #4678 we added an interface for troubleshoting job failures by
logging it in the metadata table `_timescaledb_internal.job_errors`.

With this PR we extended the existing interface to also store succeeded
executions. A new GUC named `timescaledb.enable_job_execution_logging`
was added to control this new behavior and the default value is `false`.

We renamed the metadata table to `_timescaledb_internal.bgw_job_stat_history`
and added a new view `timescaledb_information.job_history` to users that
have enough permissions can check the job execution history.
  • Loading branch information
fabriziomello committed Apr 4, 2024
1 parent 034d577 commit 52094a3
Show file tree
Hide file tree
Showing 42 changed files with 1,324 additions and 340 deletions.
2 changes: 2 additions & 0 deletions .unreleased/pr_6767
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Implements: #6767 Add metadata table `_timestaledb_internal.bgw_job_stat_history` for tracking job execution history
Thanks: @mahipv For starting working on the job history PR
2 changes: 1 addition & 1 deletion cmake/ScriptFiles.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ set(SOURCE_FILES
policy_internal.sql
cagg_utils.sql
cagg_migrate.sql
job_error_log_retention.sql
job_stat_history_log_retention.sql
osm_api.sql
compression_defaults.sql)

Expand Down
12 changes: 6 additions & 6 deletions sql/compat.sql
Original file line number Diff line number Diff line change
Expand Up @@ -378,22 +378,22 @@ END$$
SET search_path TO pg_catalog,pg_temp;


CREATE OR REPLACE FUNCTION _timescaledb_internal.policy_job_error_retention(job_id integer,config jsonb) RETURNS integer LANGUAGE PLPGSQL AS $$
CREATE OR REPLACE FUNCTION _timescaledb_internal.policy_job_stat_history_retention(job_id integer,config jsonb) RETURNS integer LANGUAGE PLPGSQL AS $$
BEGIN
IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN
RAISE WARNING 'function _timescaledb_internal.policy_job_error_retention(integer,jsonb) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';
RAISE WARNING 'function _timescaledb_internal.policy_job_stat_history_retention(integer,jsonb) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';
END IF;
RETURN _timescaledb_functions.policy_job_error_retention($1,$2);
RETURN _timescaledb_functions.policy_job_stat_history_retention($1,$2);
END$$
SET search_path TO pg_catalog,pg_temp;


CREATE OR REPLACE FUNCTION _timescaledb_internal.policy_job_error_retention_check(config jsonb) RETURNS void LANGUAGE PLPGSQL AS $$
CREATE OR REPLACE FUNCTION _timescaledb_internal.policy_job_stat_history_retention_check(config jsonb) RETURNS void LANGUAGE PLPGSQL AS $$
BEGIN
IF current_setting('timescaledb.enable_deprecation_warnings', true)::bool THEN
RAISE WARNING 'function _timescaledb_internal.policy_job_error_retention_check(jsonb) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';
RAISE WARNING 'function _timescaledb_internal.policy_job_stat_history_retention_check(jsonb) is deprecated and has been moved to _timescaledb_functions schema. this compatibility function will be removed in a future version.';
END IF;
PERFORM _timescaledb_functions.policy_job_error_retention_check($1);
PERFORM _timescaledb_functions.policy_job_stat_history_retention_check($1);
END$$
SET search_path TO pg_catalog,pg_temp;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,34 @@

-- A retention policy is set up for the table _timescaledb_internal.job_errors (Error Log Retention Policy [2])
-- By default, it will run once a month and and drop rows older than a month.
CREATE OR REPLACE FUNCTION _timescaledb_functions.policy_job_error_retention(job_id integer, config JSONB) RETURNS integer
CREATE OR REPLACE FUNCTION _timescaledb_functions.policy_job_stat_history_retention(job_id integer, config JSONB) RETURNS integer
LANGUAGE PLPGSQL AS
$BODY$
DECLARE
drop_after INTERVAL;
numrows INTEGER;
BEGIN
SELECT config->>'drop_after' INTO STRICT drop_after;
WITH deleted AS
(DELETE
FROM _timescaledb_internal.job_errors
WHERE finish_time < (now() - drop_after) RETURNING *)
SELECT count(*)
FROM deleted INTO numrows;
drop_after := config->>'drop_after';

DELETE
FROM _timescaledb_internal.bgw_job_stat_history
WHERE execution_finish < (now() - drop_after);

GET DIAGNOSTICS numrows = ROW_COUNT;

RETURN numrows;
END;
$BODY$ SET search_path TO pg_catalog, pg_temp;

CREATE OR REPLACE FUNCTION _timescaledb_functions.policy_job_error_retention_check(config JSONB) RETURNS VOID
CREATE OR REPLACE FUNCTION _timescaledb_functions.policy_job_stat_history_retention_check(config JSONB) RETURNS VOID
LANGUAGE PLPGSQL AS
$BODY$
DECLARE
drop_after interval;
BEGIN
IF config IS NULL THEN
RAISE EXCEPTION 'config cannot be NULL, and must contain drop_after';
END IF;
SELECT config->>'drop_after' INTO STRICT drop_after;
IF drop_after IS NULL THEN

IF config->>'drop_after' IS NULL THEN
RAISE EXCEPTION 'drop_after interval not provided';
END IF ;
END;
Expand All @@ -58,18 +57,18 @@ INSERT INTO _timescaledb_config.bgw_job (
VALUES
(
2,
'Error Log Retention Policy [2]',
'Job History Log Retention Policy [2]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,
INTERVAL '1h',
'_timescaledb_functions',
'policy_job_error_retention',
'policy_job_stat_history_retention',
pg_catalog.quote_ident(current_role)::regrole,
true,
'{"drop_after":"1 month"}',
'_timescaledb_functions',
'policy_job_error_retention_check',
'policy_job_stat_history_retention_check',
true,
'2000-01-01 00:00:00+00'::timestamptz
) ON CONFLICT (id) DO NOTHING;
33 changes: 22 additions & 11 deletions sql/pre_install/tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,25 @@ CREATE TABLE _timescaledb_internal.bgw_job_stat (
CONSTRAINT bgw_job_stat_job_id_fkey FOREIGN KEY (job_id) REFERENCES _timescaledb_config.bgw_job (id) ON DELETE CASCADE
);

CREATE SEQUENCE _timescaledb_internal.bgw_job_stat_history_id_seq MINVALUE 1;

CREATE TABLE _timescaledb_internal.bgw_job_stat_history (
id BIGINT NOT NULL DEFAULT nextval('_timescaledb_internal.bgw_job_stat_history_id_seq'),
job_id INTEGER NOT NULL,
pid INTEGER,
execution_start TIMESTAMPTZ NOT NULL DEFAULT NOW(),
execution_finish TIMESTAMPTZ,
succeeded boolean NOT NULL DEFAULT FALSE,
config jsonb,
error_data jsonb,
-- table constraints
CONSTRAINT bgw_job_stat_history_pkey PRIMARY KEY (id)
);

ALTER SEQUENCE _timescaledb_internal.bgw_job_stat_history_id_seq OWNED BY _timescaledb_internal.bgw_job_stat_history.id;

CREATE INDEX bgw_job_stat_history_job_id_idx ON _timescaledb_internal.bgw_job_stat_history (job_id);

--The job_stat table is not dumped by pg_dump on purpose because
--the statistics probably aren't very meaningful across instances.
-- Now we define a special stats table for each job/chunk pair. This will be used by the scheduler
Expand Down Expand Up @@ -475,14 +494,6 @@ SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.continuous_agg_

SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.continuous_agg_migrate_plan_step', 'step_id'), '');

CREATE TABLE _timescaledb_internal.job_errors (
job_id integer not null,
pid integer,
start_time timestamptz,
finish_time timestamptz,
error_data jsonb
);

-- Set table permissions
-- We need to grant SELECT to PUBLIC for all tables even those not
-- marked as being dumped because pg_dump will try to access all
Expand All @@ -500,6 +511,6 @@ GRANT SELECT ON ALL SEQUENCES IN SCHEMA _timescaledb_config TO PUBLIC;

GRANT SELECT ON ALL SEQUENCES IN SCHEMA _timescaledb_internal TO PUBLIC;

-- We want to restrict access to the job errors to only work through
-- the job_errors view.
REVOKE ALL ON _timescaledb_internal.job_errors FROM PUBLIC;
-- We want to restrict access to the bgw_job_stat_history to only work through
-- the job_errors and job_history views.
REVOKE ALL ON _timescaledb_internal.bgw_job_stat_history FROM PUBLIC;
63 changes: 63 additions & 0 deletions sql/updates/latest-dev.sql
Original file line number Diff line number Diff line change
Expand Up @@ -329,3 +329,66 @@ ANALYZE _timescaledb_catalog.continuous_agg;
--
-- END Rebuild the catalog table `_timescaledb_catalog.continuous_agg`
--

--
-- START bgw_job_stat_history
--
DROP VIEW IF EXISTS timescaledb_information.job_errors;

CREATE SEQUENCE _timescaledb_internal.bgw_job_stat_history_id_seq MINVALUE 1;

CREATE TABLE _timescaledb_internal.bgw_job_stat_history (
id INTEGER NOT NULL DEFAULT nextval('_timescaledb_internal.bgw_job_stat_history_id_seq'),
job_id INTEGER NOT NULL,
pid INTEGER,
execution_start TIMESTAMPTZ NOT NULL DEFAULT NOW(),
execution_finish TIMESTAMPTZ,
succeeded boolean NOT NULL DEFAULT FALSE,
config jsonb,
error_data jsonb,
-- table constraints
CONSTRAINT bgw_job_stat_history_pkey PRIMARY KEY (id)
);

ALTER SEQUENCE _timescaledb_internal.bgw_job_stat_history_id_seq OWNED BY _timescaledb_internal.bgw_job_stat_history.id;

CREATE INDEX bgw_job_stat_history_job_id_idx ON _timescaledb_internal.bgw_job_stat_history (job_id);

REVOKE ALL ON _timescaledb_internal.bgw_job_stat_history FROM PUBLIC;

INSERT INTO _timescaledb_internal.bgw_job_stat_history (job_id, pid, execution_start, execution_finish, error_data)
SELECT
job_id,
pid,
start_time,
finish_time,
error_data
FROM
_timescaledb_internal.job_errors
ORDER BY
job_id, start_time;

ALTER EXTENSION timescaledb
DROP TABLE _timescaledb_internal.job_errors;

DROP TABLE _timescaledb_internal.job_errors;

UPDATE
_timescaledb_config.bgw_job
SET
application_name = 'Job History Log Retention Policy [2]',
proc_schema = '_timescaledb_functions',
proc_name = 'policy_job_stat_history_retention',
check_schema = '_timescaledb_functions',
check_name = 'policy_job_stat_history_retention_check'
WHERE
id = 2;

DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention(job_id integer,config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention_check(config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_functions.policy_job_error_retention(job_id integer,config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_functions.policy_job_error_retention_check(config jsonb);

--
-- END bgw_job_stat_history
--
87 changes: 87 additions & 0 deletions sql/updates/reverse-dev.sql
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,91 @@ CREATE FUNCTION _timescaledb_functions.get_chunk_colstats(relid REGCLASS)
RETURNS TABLE(chunk_id INTEGER, hypertable_id INTEGER, att_num INTEGER, nullfrac REAL, width INTEGER, distinctval REAL, slotkind INTEGER[], slotopstrings CSTRING[], slotcollations OID[], slot1numbers FLOAT4[], slot2numbers FLOAT4[], slot3numbers FLOAT4[], slot4numbers FLOAT4[], slot5numbers FLOAT4[], slotvaluetypetrings CSTRING[], slot1values CSTRING[], slot2values CSTRING[], slot3values CSTRING[], slot4values CSTRING[], slot5values CSTRING[])
AS $$BEGIN END$$ LANGUAGE plpgsql SET search_path = pg_catalog, pg_temp;

--
-- START bgw_job_stat_history
--
DROP VIEW IF EXISTS timescaledb_information.job_errors;

ALTER EXTENSION timescaledb
DROP VIEW timescaledb_information.job_history;

DROP VIEW IF EXISTS timescaledb_information.job_history;

CREATE TABLE _timescaledb_internal.job_errors (
job_id integer not null,
pid integer,
start_time timestamptz,
finish_time timestamptz,
error_data jsonb
);

INSERT INTO _timescaledb_internal.job_errors (job_id, pid, start_time, finish_time, error_data)
SELECT
job_id,
pid,
execution_start,
execution_finish,
error_data
FROM
_timescaledb_internal.bgw_job_stat_history
WHERE
succeeded IS FALSE
ORDER BY
job_id, execution_start;

ALTER EXTENSION timescaledb
DROP TABLE _timescaledb_internal.bgw_job_stat_history;

DROP TABLE IF EXISTS _timescaledb_internal.bgw_job_stat_history;

REVOKE ALL ON _timescaledb_internal.job_errors FROM PUBLIC;

DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_stat_history_retention(job_id integer,config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_stat_history_retention_check(config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_functions.policy_job_stat_history_retention(job_id integer,config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_functions.policy_job_stat_history_retention_check(config jsonb);

CREATE OR REPLACE FUNCTION _timescaledb_functions.policy_job_error_retention(job_id integer, config JSONB) RETURNS integer
LANGUAGE PLPGSQL AS
$BODY$
DECLARE
drop_after INTERVAL;
numrows INTEGER;
BEGIN
SELECT config->>'drop_after' INTO STRICT drop_after;
WITH deleted AS
(DELETE
FROM _timescaledb_internal.job_errors
WHERE finish_time < (now() - drop_after) RETURNING *)
SELECT count(*)
FROM deleted INTO numrows;
RETURN numrows;
END;
$BODY$ SET search_path TO pg_catalog, pg_temp;

CREATE OR REPLACE FUNCTION _timescaledb_functions.policy_job_error_retention_check(config JSONB) RETURNS VOID
LANGUAGE PLPGSQL AS
$BODY$
DECLARE
drop_after interval;
BEGIN
IF config IS NULL THEN
RAISE EXCEPTION 'config cannot be NULL, and must contain drop_after';
END IF;
SELECT config->>'drop_after' INTO STRICT drop_after;
IF drop_after IS NULL THEN
RAISE EXCEPTION 'drop_after interval not provided';
END IF ;
END;
$BODY$ SET search_path TO pg_catalog, pg_temp;

UPDATE
_timescaledb_config.bgw_job
SET
application_name = 'Error Log Retention Policy [2]',
proc_schema = '_timescaledb_functions',
proc_name = 'policy_job_error_retention',
check_schema = '_timescaledb_functions',
check_name = 'policy_job_error_retention_check'
WHERE
id = 2;
Loading

0 comments on commit 52094a3

Please sign in to comment.