Skip to content

Commit

Permalink
Add support for Postgres xid and xid8 types
Browse files Browse the repository at this point in the history
  • Loading branch information
exAspArk committed Nov 18, 2024
1 parent c9a2281 commit 0d5566d
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 42 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

#### [v0.11.0](https://github.com/BemiHQ/BemiDB/compare/v0.10.0...v0.11.0) - 2024-11-18

- Add support for Postgres `xid` and `xid8` types

#### [v0.10.0](https://github.com/BemiHQ/BemiDB/compare/v0.9.0...v0.10.0) - 2024-11-16

- Add support for Postgres network address types
Expand Down
59 changes: 23 additions & 36 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,42 +237,29 @@ See the [benchmark](/benchmark) directory for more details.

Primitive data types are mapped as follows:

| PostgreSQL | Parquet | Iceberg |
|-------------------------|---------------------------------------------------|----------------------------------|
| `bool` | `BOOLEAN` | `boolean` |
| `bpchar` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `varchar` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `text` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `int2` | `INT32` | `int` |
| `int4` | `INT32` | `int` |
| `int8` | `INT64` | `long` |
| `float4` | `FLOAT` | `float` |
| `float8` | `FLOAT` | `float` |
| `numeric` | `FIXED_LEN_BYTE_ARRAY` (`DECIMAL`) | `decimal(P, S)` |
| `date` | `INT32` (`DATE`) | `date` |
| `time` | `INT64` (`TIME_MICROS` / `TIME_MILLIS`) | `time` |
| `timetz` | `INT64` (`TIME_MICROS` / `TIME_MILLIS`) | `time` |
| `timestamp` | `INT64` (`TIMESTAMP_MICROS` / `TIMESTAMP_MILLIS`) | `timestamp` / `timestamp_ns` |
| `timestamptz` | `INT64` (`TIMESTAMP_MICROS` / `TIMESTAMP_MILLIS`) | `timestamptz` / `timestamptz_ns` |
| `uuid` | `FIXED_LEN_BYTE_ARRAY` | `uuid` |
| `bytea` | `BYTE_ARRAY` (`UTF8`) | `binary` |
| `interval` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `point` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `line` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `lseg` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `box` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `path` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `polygon` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `circle` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `cidr` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `inet` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `macaddr` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `macaddr8` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `tsvector` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `json` | `BYTE_ARRAY` (`UTF8`) | `string` (JSON logical type) |
| `jsonb` | `BYTE_ARRAY` (`UTF8`) | `string` (JSON logical type) |
| `_*` (array) | `LIST` `*` | `list` |
| `*` (user-defined type) | `BYTE_ARRAY` (`UTF8`) | `string` |
| PostgreSQL | Parquet | Iceberg |
|-------------------------------------------------------------|---------------------------------------------------|----------------------------------|
| `bool` | `BOOLEAN` | `boolean` |
| `bpchar`, `varchar`, `text` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `int2`, `int4` | `INT32` | `int` |
| `int8` | `INT64` | `long` |
| `xid` | `INT32` (`UINT_32`) | `int` |
| `xid8` | `INT64` (`UINT_64`) | `long` |
| `float4`, `float8` | `FLOAT` | `float` |
| `numeric` | `FIXED_LEN_BYTE_ARRAY` (`DECIMAL`) | `decimal(P, S)` |
| `date` | `INT32` (`DATE`) | `date` |
| `time`, `timetz` | `INT64` (`TIME_MICROS` / `TIME_MILLIS`) | `time` |
| `timestamp` | `INT64` (`TIMESTAMP_MICROS` / `TIMESTAMP_MILLIS`) | `timestamp` / `timestamp_ns` |
| `timestamptz` | `INT64` (`TIMESTAMP_MICROS` / `TIMESTAMP_MILLIS`) | `timestamptz` / `timestamptz_ns` |
| `uuid` | `FIXED_LEN_BYTE_ARRAY` | `uuid` |
| `bytea` | `BYTE_ARRAY` (`UTF8`) | `binary` |
| `interval` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `point`, `line`, `lseg`, `box`, `path`, `polygon`, `circle` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `cidr`, `inet`, `macaddr`, `macaddr8` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `tsvector` | `BYTE_ARRAY` (`UTF8`) | `string` |
| `json`, `jsonb` | `BYTE_ARRAY` (`UTF8`) | `string` (JSON logical type) |
| `_*` (array) | `LIST` `*` | `list` |
| `*` (user-defined type) | `BYTE_ARRAY` (`UTF8`) | `string` |

Note that Postgres `json` and `jsonb` types are implemented as JSON logical types and stored as strings (Parquet and Iceberg don't support unstructured data types).
You can query JSON columns using standard operators, for example:
Expand Down
2 changes: 1 addition & 1 deletion scripts/install.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

VERSION="0.10.0"
VERSION="0.11.0"

# Detect OS and architecture
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
Expand Down
10 changes: 9 additions & 1 deletion scripts/test-data-types.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -f ./scripts/test-data-types.sql
-- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -v ON_ERROR_STOP=on -f ./scripts/test-data-types.sql

DROP TABLE IF EXISTS test_table;
DROP TYPE IF EXISTS address;
Expand All @@ -16,6 +16,8 @@ CREATE TABLE test_table (
int2_column INT2,
int4_column INT4,
int8_column INT8,
xid_column XID,
xid8_column XID8,
float4_column FLOAT4,
float8_column FLOAT8,
numeric_column NUMERIC(10, 2),
Expand Down Expand Up @@ -49,6 +51,8 @@ INSERT INTO test_table (
int2_column,
int4_column,
int8_column,
xid_column,
xid8_column,
float4_column,
float8_column,
numeric_column,
Expand Down Expand Up @@ -80,6 +84,8 @@ INSERT INTO test_table (
32767::INT2, -- int2_column
2147483647::INT4, -- int4_column
9223372036854775807::INT8, -- int8_column
'4294967295'::XID, -- xid_column
'18446744073709551615'::XID8, -- xid8_column
3.14::FLOAT4, -- float4_column
3.141592653589793::FLOAT8, -- float8_column
12345.67::NUMERIC(10, 2), -- numeric_column
Expand Down Expand Up @@ -111,6 +117,8 @@ INSERT INTO test_table (
-32767::INT2, -- int2_column
NULL, -- int4_column
-9223372036854775807::INT8, -- int8_column
NULL, -- xid_column
NULL, -- xid8_column
NULL, -- float4_column
-3.141592653589793::FLOAT8, -- float8_column
-12345.00::NUMERIC(10, 2), -- numeric_column
Expand Down
2 changes: 1 addition & 1 deletion scripts/test-json.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -f ./scripts/test-json.sql
-- Usage: psql postgres://127.0.0.1:5432/dbname -P pager=off -v ON_ERROR_STOP=on -f ./scripts/test-json.sql

CREATE EXTENSION IF NOT EXISTS "uuid-ossp";

Expand Down
14 changes: 14 additions & 0 deletions src/init_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ var TEST_PG_SCHEMA_COLUMNS = []PgSchemaColumn{
NumericPrecision: "64",
NumericScale: "0",
},
{
ColumnName: "xid_column",
DataType: "xid",
UdtName: "xid",
},
{
ColumnName: "xid8_column",
DataType: "xid8",
UdtName: "xid8",
},
{
ColumnName: "float4_column",
DataType: "real",
Expand Down Expand Up @@ -189,6 +199,8 @@ var TEST_LOADED_ROWS = [][]string{
"32767", // int2_column
"2147483647", // int4_column
"9223372036854775807", // int8_column
"4294967295", // xid_column
"18446744073709551615", // xid8_column
"3.14", // float4_column
"3.141592653589793", // float8_column
"12345.67", // numeric_column
Expand Down Expand Up @@ -221,6 +233,8 @@ var TEST_LOADED_ROWS = [][]string{
"-32767", // int2_column
PG_NULL_STRING, // int4_column
"-9223372036854775807", // int8_column
PG_NULL_STRING, // xid_column
PG_NULL_STRING, // xid8_column
PG_NULL_STRING, // float4_column
"-3.141592653589793", // float8_column
"-12345.00", // numeric_column
Expand Down
2 changes: 1 addition & 1 deletion src/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"time"
)

const VERSION = "0.10.0"
const VERSION = "0.11.0"

func main() {
flag.Parse()
Expand Down
16 changes: 14 additions & 2 deletions src/pg_schema_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,14 @@ func (pgSchemaColumn *PgSchemaColumn) parquetPrimitiveValue(value string) interf
intValue, err := strconv.ParseInt(value, 10, 64)
PanicIfError(err)
return intValue
case "xid":
intValue, err := strconv.ParseUint(value, 10, 32)
PanicIfError(err)
return intValue
case "xid8":
intValue, err := strconv.ParseUint(value, 10, 64)
PanicIfError(err)
return intValue
case "float4":
floatValue, err := strconv.ParseFloat(value, 32)
PanicIfError(err)
Expand Down Expand Up @@ -300,6 +308,10 @@ func (pgSchemaColumn *PgSchemaColumn) parquetPrimitiveTypes() (primitiveType str
return "DOUBLE", ""
case "numeric":
return "FIXED_LEN_BYTE_ARRAY", "DECIMAL"
case "xid":
return "INT32", "UINT_32"
case "xid8":
return "INT64", "UINT_64"
case "uuid":
return "FIXED_LEN_BYTE_ARRAY", ""
case "bool":
Expand Down Expand Up @@ -333,9 +345,9 @@ func (pgSchemaColumn *PgSchemaColumn) icebergPrimitiveType() string {
return "string"
case "uuid":
return "uuid"
case "int2", "int4":
case "int2", "int4", "xid":
return "int"
case "int8":
case "int8", "xid8":
return "long"
case "float4", "float8":
return "float"
Expand Down
68 changes: 68 additions & 0 deletions src/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,56 @@ func (nullDecimal NullDecimal) String() string {

////////////////////////////////////////////////////////////////////////////////////////////////////

type NullUint32 struct {
Present bool
Value uint32
}

func (nullUint32 *NullUint32) Scan(value interface{}) error {
if value == nil {
nullUint32.Present = false
return nil
}

nullUint32.Present = true
nullUint32.Value = value.(uint32)
return nil
}

func (nullUint32 NullUint32) String() string {
if nullUint32.Present {
return fmt.Sprintf("%v", nullUint32.Value)
}
return ""
}

////////////////////////////////////////////////////////////////////////////////////////////////////

type NullUint64 struct {
Present bool
Value uint64
}

func (nullUint64 *NullUint64) Scan(value interface{}) error {
if value == nil {
nullUint64.Present = false
return nil
}

nullUint64.Present = true
nullUint64.Value = value.(uint64)
return nil
}

func (nullUint64 NullUint64) String() string {
if nullUint64.Present {
return fmt.Sprintf("%v", nullUint64.Value)
}
return ""
}

////////////////////////////////////////////////////////////////////////////////////////////////////

type NullArray struct {
Present bool
Value []interface{}
Expand Down Expand Up @@ -213,6 +263,12 @@ func (proxy *Proxy) generateDataRow(rows *sql.Rows, cols []*sql.ColumnType) (*pg
case "int64", "*big.Int":
var value sql.NullInt64
valuePtrs[i] = &value
case "uint32": // xid
var value NullUint32
valuePtrs[i] = &value
case "uint64": // xid8
var value NullUint64
valuePtrs[i] = &value
case "float64", "float32":
var value sql.NullFloat64
valuePtrs[i] = &value
Expand Down Expand Up @@ -256,6 +312,18 @@ func (proxy *Proxy) generateDataRow(rows *sql.Rows, cols []*sql.ColumnType) (*pg
} else {
values = append(values, nil)
}
case *NullUint32:
if value.Present {
values = append(values, []byte(value.String()))
} else {
values = append(values, nil)
}
case *NullUint64:
if value.Present {
values = append(values, []byte(value.String()))
} else {
values = append(values, nil)
}
case *sql.NullFloat64:
if value.Valid {
values = append(values, []byte(fmt.Sprintf("%v", value.Float64)))
Expand Down
16 changes: 16 additions & 0 deletions src/proxy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,22 @@ func TestHandleQuery(t *testing.T) {
"description": {"int8_column"},
"values": {"-9223372036854775807"},
},
"SELECT xid_column FROM public.test_table WHERE xid_column IS NOT NULL": {
"description": {"xid_column"},
"values": {"4294967295"},
},
"SELECT xid_column FROM public.test_table WHERE xid_column IS NULL": {
"description": {"xid_column"},
"values": {""},
},
"SELECT xid8_column FROM public.test_table WHERE xid8_column IS NOT NULL": {
"description": {"xid8_column"},
"values": {"18446744073709551615"},
},
"SELECT xid8_column FROM public.test_table WHERE xid8_column IS NULL": {
"description": {"xid8_column"},
"values": {""},
},
"SELECT float4_column FROM public.test_table WHERE float4_column IS NOT NULL": {
"description": {"float4_column"},
"values": {"3.14"},
Expand Down

0 comments on commit 0d5566d

Please sign in to comment.