diff --git a/CHANGELOG.md b/CHANGELOG.md index baa6788..ebf0c04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +#### [v0.11.0](https://github.com/BemiHQ/BemiDB/compare/v0.10.0...v0.11.0) - 2024-11-18 + +- Add support for Postgres `xid` and `xid8` types + #### [v0.10.0](https://github.com/BemiHQ/BemiDB/compare/v0.9.0...v0.10.0) - 2024-11-16 - Add support for Postgres network address types diff --git a/README.md b/README.md index 90c8501..4c24021 100644 --- a/README.md +++ b/README.md @@ -237,42 +237,29 @@ See the [benchmark](/benchmark) directory for more details. Primitive data types are mapped as follows: -| PostgreSQL | Parquet | Iceberg | -|-------------------------|---------------------------------------------------|----------------------------------| -| `bool` | `BOOLEAN` | `boolean` | -| `bpchar` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `varchar` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `text` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `int2` | `INT32` | `int` | -| `int4` | `INT32` | `int` | -| `int8` | `INT64` | `long` | -| `float4` | `FLOAT` | `float` | -| `float8` | `FLOAT` | `float` | -| `numeric` | `FIXED_LEN_BYTE_ARRAY` (`DECIMAL`) | `decimal(P, S)` | -| `date` | `INT32` (`DATE`) | `date` | -| `time` | `INT64` (`TIME_MICROS` / `TIME_MILLIS`) | `time` | -| `timetz` | `INT64` (`TIME_MICROS` / `TIME_MILLIS`) | `time` | -| `timestamp` | `INT64` (`TIMESTAMP_MICROS` / `TIMESTAMP_MILLIS`) | `timestamp` / `timestamp_ns` | -| `timestamptz` | `INT64` (`TIMESTAMP_MICROS` / `TIMESTAMP_MILLIS`) | `timestamptz` / `timestamptz_ns` | -| `uuid` | `FIXED_LEN_BYTE_ARRAY` | `uuid` | -| `bytea` | `BYTE_ARRAY` (`UTF8`) | `binary` | -| `interval` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `point` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `line` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `lseg` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `box` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `path` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `polygon` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `circle` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `cidr` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `inet` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `macaddr` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `macaddr8` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `tsvector` | `BYTE_ARRAY` (`UTF8`) | `string` | -| `json` | `BYTE_ARRAY` (`UTF8`) | `string` (JSON logical type) | -| `jsonb` | `BYTE_ARRAY` (`UTF8`) | `string` (JSON logical type) | -| `_*` (array) | `LIST` `*` | `list` | -| `*` (user-defined type) | `BYTE_ARRAY` (`UTF8`) | `string` | +| PostgreSQL | Parquet | Iceberg | +|-------------------------------------------------------------|---------------------------------------------------|----------------------------------| +| `bool` | `BOOLEAN` | `boolean` | +| `bpchar`, `varchar`, `text` | `BYTE_ARRAY` (`UTF8`) | `string` | +| `int2`, `int4` | `INT32` | `int` | +| `int8` | `INT64` | `long` | +| `xid` | `INT32` (`UINT_32`) | `int` | +| `xid8` | `INT64` (`UINT_64`) | `long` | +| `float4`, `float8` | `FLOAT` | `float` | +| `numeric` | `FIXED_LEN_BYTE_ARRAY` (`DECIMAL`) | `decimal(P, S)` | +| `date` | `INT32` (`DATE`) | `date` | +| `time`, `timetz` | `INT64` (`TIME_MICROS` / `TIME_MILLIS`) | `time` | +| `timestamp` | `INT64` (`TIMESTAMP_MICROS` / `TIMESTAMP_MILLIS`) | `timestamp` / `timestamp_ns` | +| `timestamptz` | `INT64` (`TIMESTAMP_MICROS` / `TIMESTAMP_MILLIS`) | `timestamptz` / `timestamptz_ns` | +| `uuid` | `FIXED_LEN_BYTE_ARRAY` | `uuid` | +| `bytea` | `BYTE_ARRAY` (`UTF8`) | `binary` | +| `interval` | `BYTE_ARRAY` (`UTF8`) | `string` | +| `point`, `line`, `lseg`, `box`, `path`, `polygon`, `circle` | `BYTE_ARRAY` (`UTF8`) | `string` | +| `cidr`, `inet`, `macaddr`, `macaddr8` | `BYTE_ARRAY` (`UTF8`) | `string` | +| `tsvector` | `BYTE_ARRAY` (`UTF8`) | `string` | +| `json`, `jsonb` | `BYTE_ARRAY` (`UTF8`) | `string` (JSON logical type) | +| `_*` (array) | `LIST` `*` | `list` | +| `*` (user-defined type) | `BYTE_ARRAY` (`UTF8`) | `string` | Note that Postgres `json` and `jsonb` types are implemented as JSON logical types and stored as strings (Parquet and Iceberg don't support unstructured data types). You can query JSON columns using standard operators, for example: diff --git a/scripts/install.sh b/scripts/install.sh index f3e51de..0bda5d7 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -1,6 +1,6 @@ #!/bin/bash -VERSION="0.10.0" +VERSION="0.11.0" # Detect OS and architecture OS=$(uname -s | tr '[:upper:]' '[:lower:]') diff --git a/scripts/test-data-types.sql b/scripts/test-data-types.sql index f36346c..88d970b 100644 --- a/scripts/test-data-types.sql +++ b/scripts/test-data-types.sql @@ -1,4 +1,4 @@ --- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -f ./scripts/test-data-types.sql +-- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -v ON_ERROR_STOP=on -f ./scripts/test-data-types.sql DROP TABLE IF EXISTS test_table; DROP TYPE IF EXISTS address; @@ -16,6 +16,8 @@ CREATE TABLE test_table ( int2_column INT2, int4_column INT4, int8_column INT8, + xid_column XID, + xid8_column XID8, float4_column FLOAT4, float8_column FLOAT8, numeric_column NUMERIC(10, 2), @@ -49,6 +51,8 @@ INSERT INTO test_table ( int2_column, int4_column, int8_column, + xid_column, + xid8_column, float4_column, float8_column, numeric_column, @@ -80,6 +84,8 @@ INSERT INTO test_table ( 32767::INT2, -- int2_column 2147483647::INT4, -- int4_column 9223372036854775807::INT8, -- int8_column + '4294967295'::XID, -- xid_column + '18446744073709551615'::XID8, -- xid8_column 3.14::FLOAT4, -- float4_column 3.141592653589793::FLOAT8, -- float8_column 12345.67::NUMERIC(10, 2), -- numeric_column @@ -111,6 +117,8 @@ INSERT INTO test_table ( -32767::INT2, -- int2_column NULL, -- int4_column -9223372036854775807::INT8, -- int8_column + NULL, -- xid_column + NULL, -- xid8_column NULL, -- float4_column -3.141592653589793::FLOAT8, -- float8_column -12345.00::NUMERIC(10, 2), -- numeric_column diff --git a/scripts/test-json.sql b/scripts/test-json.sql index 245966c..465d531 100644 --- a/scripts/test-json.sql +++ b/scripts/test-json.sql @@ -1,4 +1,4 @@ --- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -f ./scripts/test-json.sql +-- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -v ON_ERROR_STOP=on -f ./scripts/test-json.sql CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; diff --git a/src/init_test.go b/src/init_test.go index 4ec4a70..26a39f8 100644 --- a/src/init_test.go +++ b/src/init_test.go @@ -49,6 +49,16 @@ var TEST_PG_SCHEMA_COLUMNS = []PgSchemaColumn{ NumericPrecision: "64", NumericScale: "0", }, + { + ColumnName: "xid_column", + DataType: "xid", + UdtName: "xid", + }, + { + ColumnName: "xid8_column", + DataType: "xid8", + UdtName: "xid8", + }, { ColumnName: "float4_column", DataType: "real", @@ -189,6 +199,8 @@ var TEST_LOADED_ROWS = [][]string{ "32767", // int2_column "2147483647", // int4_column "9223372036854775807", // int8_column + "4294967295", // xid_column + "18446744073709551615", // xid8_column "3.14", // float4_column "3.141592653589793", // float8_column "12345.67", // numeric_column @@ -221,6 +233,8 @@ var TEST_LOADED_ROWS = [][]string{ "-32767", // int2_column PG_NULL_STRING, // int4_column "-9223372036854775807", // int8_column + PG_NULL_STRING, // xid_column + PG_NULL_STRING, // xid8_column PG_NULL_STRING, // float4_column "-3.141592653589793", // float8_column "-12345.00", // numeric_column diff --git a/src/main.go b/src/main.go index 13b9de0..82bcf9c 100644 --- a/src/main.go +++ b/src/main.go @@ -6,7 +6,7 @@ import ( "time" ) -const VERSION = "0.10.0" +const VERSION = "0.11.0" func main() { flag.Parse() diff --git a/src/pg_schema_column.go b/src/pg_schema_column.go index 22e020b..652fad6 100644 --- a/src/pg_schema_column.go +++ b/src/pg_schema_column.go @@ -217,6 +217,14 @@ func (pgSchemaColumn *PgSchemaColumn) parquetPrimitiveValue(value string) interf intValue, err := strconv.ParseInt(value, 10, 64) PanicIfError(err) return intValue + case "xid": + intValue, err := strconv.ParseUint(value, 10, 32) + PanicIfError(err) + return intValue + case "xid8": + intValue, err := strconv.ParseUint(value, 10, 64) + PanicIfError(err) + return intValue case "float4": floatValue, err := strconv.ParseFloat(value, 32) PanicIfError(err) @@ -300,6 +308,10 @@ func (pgSchemaColumn *PgSchemaColumn) parquetPrimitiveTypes() (primitiveType str return "DOUBLE", "" case "numeric": return "FIXED_LEN_BYTE_ARRAY", "DECIMAL" + case "xid": + return "INT32", "UINT_32" + case "xid8": + return "INT64", "UINT_64" case "uuid": return "FIXED_LEN_BYTE_ARRAY", "" case "bool": @@ -333,9 +345,9 @@ func (pgSchemaColumn *PgSchemaColumn) icebergPrimitiveType() string { return "string" case "uuid": return "uuid" - case "int2", "int4": + case "int2", "int4", "xid": return "int" - case "int8": + case "int8", "xid8": return "long" case "float4", "float8": return "float" diff --git a/src/proxy.go b/src/proxy.go index 7da766e..fadba61 100644 --- a/src/proxy.go +++ b/src/proxy.go @@ -53,6 +53,56 @@ func (nullDecimal NullDecimal) String() string { //////////////////////////////////////////////////////////////////////////////////////////////////// +type NullUint32 struct { + Present bool + Value uint32 +} + +func (nullUint32 *NullUint32) Scan(value interface{}) error { + if value == nil { + nullUint32.Present = false + return nil + } + + nullUint32.Present = true + nullUint32.Value = value.(uint32) + return nil +} + +func (nullUint32 NullUint32) String() string { + if nullUint32.Present { + return fmt.Sprintf("%v", nullUint32.Value) + } + return "" +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +type NullUint64 struct { + Present bool + Value uint64 +} + +func (nullUint64 *NullUint64) Scan(value interface{}) error { + if value == nil { + nullUint64.Present = false + return nil + } + + nullUint64.Present = true + nullUint64.Value = value.(uint64) + return nil +} + +func (nullUint64 NullUint64) String() string { + if nullUint64.Present { + return fmt.Sprintf("%v", nullUint64.Value) + } + return "" +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + type NullArray struct { Present bool Value []interface{} @@ -213,6 +263,12 @@ func (proxy *Proxy) generateDataRow(rows *sql.Rows, cols []*sql.ColumnType) (*pg case "int64", "*big.Int": var value sql.NullInt64 valuePtrs[i] = &value + case "uint32": // xid + var value NullUint32 + valuePtrs[i] = &value + case "uint64": // xid8 + var value NullUint64 + valuePtrs[i] = &value case "float64", "float32": var value sql.NullFloat64 valuePtrs[i] = &value @@ -256,6 +312,18 @@ func (proxy *Proxy) generateDataRow(rows *sql.Rows, cols []*sql.ColumnType) (*pg } else { values = append(values, nil) } + case *NullUint32: + if value.Present { + values = append(values, []byte(value.String())) + } else { + values = append(values, nil) + } + case *NullUint64: + if value.Present { + values = append(values, []byte(value.String())) + } else { + values = append(values, nil) + } case *sql.NullFloat64: if value.Valid { values = append(values, []byte(fmt.Sprintf("%v", value.Float64))) diff --git a/src/proxy_test.go b/src/proxy_test.go index 7e0c738..3a1264b 100644 --- a/src/proxy_test.go +++ b/src/proxy_test.go @@ -119,6 +119,22 @@ func TestHandleQuery(t *testing.T) { "description": {"int8_column"}, "values": {"-9223372036854775807"}, }, + "SELECT xid_column FROM public.test_table WHERE xid_column IS NOT NULL": { + "description": {"xid_column"}, + "values": {"4294967295"}, + }, + "SELECT xid_column FROM public.test_table WHERE xid_column IS NULL": { + "description": {"xid_column"}, + "values": {""}, + }, + "SELECT xid8_column FROM public.test_table WHERE xid8_column IS NOT NULL": { + "description": {"xid8_column"}, + "values": {"18446744073709551615"}, + }, + "SELECT xid8_column FROM public.test_table WHERE xid8_column IS NULL": { + "description": {"xid8_column"}, + "values": {""}, + }, "SELECT float4_column FROM public.test_table WHERE float4_column IS NOT NULL": { "description": {"float4_column"}, "values": {"3.14"},