From bb7cd505629e6fbd0846ff5cda787c0c486563c5 Mon Sep 17 00:00:00 2001 From: Shubham Dhama Date: Tue, 22 Nov 2022 18:58:06 +0530 Subject: [PATCH] Add option to output numeric data types as string. (#255) Add option to output numeric data types as string. Data types like `numeric`, `real`, `double precision` supports `Infinity`, `-Infinity` and `NaN` values. Currently these values output as `null` because JSON specification does not recognize them as valid numeric values. This will create problems for the users of wal2json who need these values to maintain data integerity. --- Makefile | 2 +- README.md | 1 + expected/numeric_data_types_as_string.out | 187 ++++++++++++++++++++++ sql/numeric_data_types_as_string.sql | 33 ++++ wal2json.c | 57 ++++++- 5 files changed, 274 insertions(+), 6 deletions(-) create mode 100644 expected/numeric_data_types_as_string.out create mode 100644 sql/numeric_data_types_as_string.sql diff --git a/Makefile b/Makefile index 6b7187111905..c0effef25d5e 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ REGRESS = cmdline insert1 update1 update2 update3 update4 delete1 delete2 \ delete3 delete4 savepoint specialvalue toast bytea message typmod \ filtertable selecttable include_timestamp include_lsn include_xids \ include_domain_data_type truncate type_oid actions position default \ - pk rename_column + pk rename_column numeric_data_types_as_string PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/README.md b/README.md index 7d7570492f1c..3fd55ed3e91f 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ Parameters * `include-not-null`: add _not null_ information as _columnoptionals_. Default is _false_. * `include-default`: add default expression. Default is _false_. * `include-pk`: add _primary key_ information as _pk_. Column name and data type is included. Default is _false_. +* `numeric-data-types-as-string`: use string for numeric data types. JSON specification does not recognize `Infinity` and `NaN` as valid numeric values. There might be [potential interoperability problems](https://datatracker.ietf.org/doc/html/rfc7159#section-6) for double precision numbers. Default is _false_. * `pretty-print`: add spaces and indentation to JSON structures. Default is _false_. * `write-in-chunks`: write after every change instead of every changeset. Only used when `format-version` is `1`. Default is _false_. * `include-lsn`: add _nextlsn_ to each changeset. Default is _false_. diff --git a/expected/numeric_data_types_as_string.out b/expected/numeric_data_types_as_string.out new file mode 100644 index 000000000000..cad032e79009 --- /dev/null +++ b/expected/numeric_data_types_as_string.out @@ -0,0 +1,187 @@ +\set VERBOSITY terse +-- predictability +SET synchronous_commit = on; +SET extra_float_digits = 0; +CREATE TABLE table_integer (a smallserial, b smallint, c int, d bigint); +CREATE TABLE table_decimal (a real, b double precision, c numeric); +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'wal2json'); + ?column? +---------- + init +(1 row) + +BEGIN; +INSERT INTO table_integer (b, c, d) VALUES(32767, 2147483647, 9223372036854775807); +INSERT INTO table_integer (b, c, d) VALUES(-32768, -2147483648, -9223372036854775808); +INSERT INTO table_decimal (a, b) VALUES('Infinity', 'Infinity'); +INSERT INTO table_decimal (a, b) VALUES('-Infinity', '-Infinity'); +INSERT INTO table_decimal (a, b, c) VALUES('NaN', 'NaN', 'NaN'); +INSERT INTO table_decimal (a, b, c) VALUES(123.456, 123456789.012345, 1234567890987654321.1234567890987654321); +INSERT INTO table_decimal (a, b, c) VALUES(-123.456, -123456789.012345, -1234567890987654321.1234567890987654321); +COMMIT; +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); + data +----------------------------------------------------------------------------------------------------------------------- + { + + "change": [ + + { + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": ["1", "32767", "2147483647", "9223372036854775807"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": ["2", "-32768", "-2147483648", "-9223372036854775808"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["Infinity", "Infinity", null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["-Infinity", "-Infinity", null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["NaN", "NaN", "NaN"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["123.456", "123456789.012345", "1234567890987654321.1234567890987654321"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["-123.456", "-123456789.012345", "-1234567890987654321.1234567890987654321"]+ + } + + ] + + } +(1 row) + +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1'); + data +----------------------------------------------------------------------------------------------------------------- + { + + "change": [ + + { + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": [1, 32767, 2147483647, 9223372036854775807] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": [2, -32768, -2147483648, -9223372036854775808] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [null, null, null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [null, null, null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [null, null, null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [123.456, 123456789.012345, 1234567890987654321.1234567890987654321] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [-123.456, -123456789.012345, -1234567890987654321.1234567890987654321]+ + } + + ] + + } +(1 row) + +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2', 'numeric-data-types-as-string', '1'); + data +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"action":"B"} + {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":"1"},{"name":"b","type":"smallint","value":"32767"},{"name":"c","type":"integer","value":"2147483647"},{"name":"d","type":"bigint","value":"9223372036854775807"}]} + {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":"2"},{"name":"b","type":"smallint","value":"-32768"},{"name":"c","type":"integer","value":"-2147483648"},{"name":"d","type":"bigint","value":"-9223372036854775808"}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"Infinity"},{"name":"b","type":"double precision","value":"Infinity"},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"-Infinity"},{"name":"b","type":"double precision","value":"-Infinity"},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"NaN"},{"name":"b","type":"double precision","value":"NaN"},{"name":"c","type":"numeric","value":"NaN"}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"123.456"},{"name":"b","type":"double precision","value":"123456789.012345"},{"name":"c","type":"numeric","value":"1234567890987654321.1234567890987654321"}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"-123.456"},{"name":"b","type":"double precision","value":"-123456789.012345"},{"name":"c","type":"numeric","value":"-1234567890987654321.1234567890987654321"}]} + {"action":"C"} +(9 rows) + +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2'); + data +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"action":"B"} + {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":1},{"name":"b","type":"smallint","value":32767},{"name":"c","type":"integer","value":2147483647},{"name":"d","type":"bigint","value":9223372036854775807}]} + {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":2},{"name":"b","type":"smallint","value":-32768},{"name":"c","type":"integer","value":-2147483648},{"name":"d","type":"bigint","value":-9223372036854775808}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":null},{"name":"b","type":"double precision","value":null},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":null},{"name":"b","type":"double precision","value":null},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":null},{"name":"b","type":"double precision","value":null},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":123.456},{"name":"b","type":"double precision","value":123456789.012345},{"name":"c","type":"numeric","value":1234567890987654321.1234567890987654321}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":-123.456},{"name":"b","type":"double precision","value":-123456789.012345},{"name":"c","type":"numeric","value":-1234567890987654321.1234567890987654321}]} + {"action":"C"} +(9 rows) + +SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); + ?column? +---------- + stop +(1 row) + +DROP TABLE table_integer; +DROP TABLE table_decimal; diff --git a/sql/numeric_data_types_as_string.sql b/sql/numeric_data_types_as_string.sql new file mode 100644 index 000000000000..9a06cadfaa6d --- /dev/null +++ b/sql/numeric_data_types_as_string.sql @@ -0,0 +1,33 @@ +\set VERBOSITY terse + +-- predictability +SET synchronous_commit = on; +SET extra_float_digits = 0; + + + +CREATE TABLE table_integer (a smallserial, b smallint, c int, d bigint); +CREATE TABLE table_decimal (a real, b double precision, c numeric); + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'wal2json'); + +BEGIN; +INSERT INTO table_integer (b, c, d) VALUES(32767, 2147483647, 9223372036854775807); +INSERT INTO table_integer (b, c, d) VALUES(-32768, -2147483648, -9223372036854775808); + +INSERT INTO table_decimal (a, b) VALUES('Infinity', 'Infinity'); +INSERT INTO table_decimal (a, b) VALUES('-Infinity', '-Infinity'); +INSERT INTO table_decimal (a, b, c) VALUES('NaN', 'NaN', 'NaN'); +INSERT INTO table_decimal (a, b, c) VALUES(123.456, 123456789.012345, 1234567890987654321.1234567890987654321); +INSERT INTO table_decimal (a, b, c) VALUES(-123.456, -123456789.012345, -1234567890987654321.1234567890987654321); +COMMIT; + +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1'); +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2', 'numeric-data-types-as-string', '1'); +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2'); + +SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); + +DROP TABLE table_integer; +DROP TABLE table_decimal; diff --git a/wal2json.c b/wal2json.c index 462882fe5d35..98cfd1d1c802 100644 --- a/wal2json.c +++ b/wal2json.c @@ -69,6 +69,7 @@ typedef struct bool pretty_print; /* pretty-print JSON? */ bool write_in_chunks; /* write in chunks? (v1) */ + bool numeric_data_types_as_string; /* use strings for numeric data types */ JsonAction actions; /* output only these actions */ @@ -261,6 +262,7 @@ pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is data->include_typmod = true; data->include_domain_data_type = false; data->include_column_positions = false; + data->numeric_data_types_as_string = false; data->pretty_print = false; data->write_in_chunks = false; data->include_lsn = false; @@ -480,6 +482,19 @@ pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is errmsg("could not parse value \"%s\" for parameter \"%s\"", strVal(elem->arg), elem->defname))); } + else if (strcmp(elem->defname, "numeric-data-types-as-string") == 0) + { + if (elem->arg == NULL) + { + elog(DEBUG1, "numeric-data-types-as-string argument is null"); + data->numeric_data_types_as_string = true; + } + else if (!parse_bool(strVal(elem->arg), &data->numeric_data_types_as_string)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse value \"%s\" for parameter \"%s\"", + strVal(elem->arg), elem->defname))); + } else if (strcmp(elem->defname, "pretty-print") == 0) { if (elem->arg == NULL) @@ -1259,6 +1274,10 @@ tuple_to_stringinfo(LogicalDecodingContext *ctx, TupleDesc tupdesc, HeapTuple tu * * The NaN and Infinity are not valid JSON symbols. Hence, * regardless of sign they are represented as the string null. + * + * Exception to this is when data->numeric_data_types_as_string is + * true. In this case, numbers (including NaN and Infinity values) + * are printed with quotes. */ switch (typid) { @@ -1269,7 +1288,18 @@ tuple_to_stringinfo(LogicalDecodingContext *ctx, TupleDesc tupdesc, HeapTuple tu case FLOAT4OID: case FLOAT8OID: case NUMERICOID: - if (pg_strncasecmp(outputstr, "NaN", 3) == 0 || + if (data->numeric_data_types_as_string) { + if (strspn(outputstr, "0123456789+-eE.") == strlen(outputstr) || + pg_strncasecmp(outputstr, "NaN", 3) == 0 || + pg_strncasecmp(outputstr, "Infinity", 8) == 0 || + pg_strncasecmp(outputstr, "-Infinity", 9) == 0) { + appendStringInfo(&colvalues, "%s", comma); + escape_json(&colvalues, outputstr); + } else { + elog(ERROR, "%s is not a number", outputstr); + } + } + else if (pg_strncasecmp(outputstr, "NaN", 3) == 0 || pg_strncasecmp(outputstr, "Infinity", 8) == 0 || pg_strncasecmp(outputstr, "-Infinity", 9) == 0) { @@ -1846,9 +1876,12 @@ pg_decode_change_v1(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, static void pg_decode_write_value(LogicalDecodingContext *ctx, Datum value, bool isnull, Oid typid) { - Oid typoutfunc; - bool isvarlena; - char *outstr; + JsonDecodingData *data; + Oid typoutfunc; + bool isvarlena; + char *outstr; + + data = ctx->output_plugin_private; if (isnull) { @@ -1885,6 +1918,10 @@ pg_decode_write_value(LogicalDecodingContext *ctx, Datum value, bool isnull, Oid * * The NaN an Infinity are not valid JSON symbols. Hence, regardless of * sign they are represented as the string null. + * + * Exception to this is when data->numeric_data_types_as_string is + * true. In this case, numbers (including NaN and Infinity values) + * are printed with quotes. */ switch (typid) { @@ -1895,7 +1932,17 @@ pg_decode_write_value(LogicalDecodingContext *ctx, Datum value, bool isnull, Oid case FLOAT4OID: case FLOAT8OID: case NUMERICOID: - if (pg_strncasecmp(outstr, "NaN", 3) == 0 || + if (data->numeric_data_types_as_string) { + if (strspn(outstr, "0123456789+-eE.") == strlen(outstr) || + pg_strncasecmp(outstr, "NaN", 3) == 0 || + pg_strncasecmp(outstr, "Infinity", 8) == 0 || + pg_strncasecmp(outstr, "-Infinity", 9) == 0) { + escape_json(ctx->out, outstr); + } else { + elog(ERROR, "%s is not a number", outstr); + } + } + else if (pg_strncasecmp(outstr, "NaN", 3) == 0 || pg_strncasecmp(outstr, "Infinity", 8) == 0 || pg_strncasecmp(outstr, "-Infinity", 9) == 0) {