From 80f4ab9908bba36b6ddca10809ef498c967c7401 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Sat, 23 Apr 2022 11:24:42 -0400
Subject: [PATCH] feat(clickhouse): implement proper type serialization

---
 docker-compose.yml                            |   2 +-
 ibis/backends/clickhouse/__init__.py          |  26 +--
 ibis/backends/clickhouse/client.py            | 140 +----------
 ibis/backends/clickhouse/datatypes.py         | 220 ++++++++++++++++++
 ibis/backends/clickhouse/registry.py          |   8 +-
 .../clickhouse/tests/test_functions.py        |  53 +++--
 .../clickhouse/tests/test_operators.py        |   4 +-
 ibis/backends/clickhouse/tests/test_select.py |   2 +-
 ibis/backends/clickhouse/tests/test_types.py  | 145 +++++++++---
 ibis/backends/duckdb/__init__.py              |   9 +-
 ibis/backends/duckdb/datatypes.py             |  83 ++++---
 ibis/backends/duckdb/tests/test_datatypes.py  |   9 +-
 ibis/backends/postgres/datatypes.py           |   2 +-
 ibis/backends/tests/test_array.py             |   9 +-
 ibis/expr/datatypes/core.py                   | 148 +++++++-----
 ibis/tests/benchmarks/test_benchmarks.py      |   4 +-
 ibis/tests/expr/test_datatypes.py             |   5 +
 poetry.lock                                   |   4 +-
 pyproject.toml                                |   4 +-
 setup.py                                      |   4 +-
 20 files changed, 557 insertions(+), 324 deletions(-)
 create mode 100644 ibis/backends/clickhouse/datatypes.py

diff --git a/docker-compose.yml b/docker-compose.yml
index 8a0041e2413d..670ec3c4e1e6 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,7 +1,7 @@
 version: "3.4"
 services:
   clickhouse:
-    image: yandex/clickhouse-server:22-alpine
+    image: clickhouse/clickhouse-server:22-alpine
     ports:
       - 8123:8123
       - 9000:9000
diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py
index 30480388c31a..5e3fa644c7e6 100644
--- a/ibis/backends/clickhouse/__init__.py
+++ b/ibis/backends/clickhouse/__init__.py
@@ -11,12 +11,9 @@
 import ibis.config
 import ibis.expr.schema as sch
 from ibis.backends.base.sql import BaseSQLBackend
-from ibis.backends.clickhouse.client import (
-    ClickhouseDataType,
-    ClickhouseTable,
-    fully_qualified_re,
-)
+from ibis.backends.clickhouse.client import ClickhouseTable, fully_qualified_re
 from ibis.backends.clickhouse.compiler import ClickhouseCompiler
+from ibis.backends.clickhouse.datatypes import parse, serialize
 from ibis.config import options
 
 _default_compression: str | bool
@@ -109,12 +106,12 @@ def current_database(self):
         return self.con.connection.database
 
     def list_databases(self, like=None):
-        data, schema = self.raw_sql('SELECT name FROM system.databases')
+        data, _ = self.raw_sql('SELECT name FROM system.databases')
         databases = list(data[0])
         return self._filter_with_like(databases, like)
 
     def list_tables(self, like=None, database=None):
-        data, schema = self.raw_sql('SHOW TABLES')
+        data, _ = self.raw_sql('SHOW TABLES')
         databases = list(data[0])
         return self._filter_with_like(databases, like)
 
@@ -152,13 +149,7 @@ def raw_sql(
                     'name': name,
                     'data': df.to_dict('records'),
                     'structure': list(
-                        zip(
-                            schema.names,
-                            [
-                                str(ClickhouseDataType.from_ibis(t))
-                                for t in schema.types
-                            ],
-                        )
+                        zip(schema.names, map(serialize, schema.types))
                     ),
                 }
             )
@@ -216,9 +207,8 @@ def get_schema(
         (column_names, types, *_), *_ = self.raw_sql(
             f"DESCRIBE {qualified_name}"
         )
-        return sch.Schema.from_tuples(
-            zip(column_names, map(ClickhouseDataType.parse, types))
-        )
+
+        return sch.Schema.from_tuples(zip(column_names, map(parse, types)))
 
     def set_options(self, options):
         self.con.set_options(options)
@@ -238,7 +228,7 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
         )
         [plan] = json.loads(raw_plans)
         fields = [
-            (field["Name"], ClickhouseDataType.parse(field["Type"]))
+            (field["Name"], parse(field["Type"]))
             for field in plan["Plan"]["Header"]
         ]
         return sch.Schema.from_tuples(fields)
diff --git a/ibis/backends/clickhouse/client.py b/ibis/backends/clickhouse/client.py
index c30de3284074..0bd6a2878742 100644
--- a/ibis/backends/clickhouse/client.py
+++ b/ibis/backends/clickhouse/client.py
@@ -4,111 +4,10 @@
 import numpy as np
 import pandas as pd
 
-import ibis.common.exceptions as com
 import ibis.expr.datatypes as dt
 import ibis.expr.types as ir
 
 fully_qualified_re = re.compile(r"(.*)\.(?:`(.*)`|(.*))")
-base_typename_re = re.compile(r"(\w+)")
-
-
-_clickhouse_dtypes = {
-    'Null': dt.Null,
-    'Nothing': dt.Null,
-    'UInt8': dt.UInt8,
-    'UInt16': dt.UInt16,
-    'UInt32': dt.UInt32,
-    'UInt64': dt.UInt64,
-    'Int8': dt.Int8,
-    'Int16': dt.Int16,
-    'Int32': dt.Int32,
-    'Int64': dt.Int64,
-    'Float32': dt.Float32,
-    'Float64': dt.Float64,
-    'String': dt.String,
-    'FixedString': dt.String,
-    'Date': dt.Date,
-    'DateTime': dt.Timestamp,
-    'DateTime64': dt.Timestamp,
-    'Array': dt.Array,
-}
-_ibis_dtypes = {v: k for k, v in _clickhouse_dtypes.items()}
-_ibis_dtypes[dt.String] = 'String'
-_ibis_dtypes[dt.Timestamp] = 'DateTime'
-
-
-class ClickhouseDataType:
-
-    __slots__ = 'typename', 'base_typename', 'nullable'
-
-    def __init__(self, typename, nullable=False):
-        m = base_typename_re.match(typename)
-        self.base_typename = m.groups()[0]
-        if self.base_typename not in _clickhouse_dtypes:
-            raise com.UnsupportedBackendType(typename)
-        self.typename = self.base_typename
-        self.nullable = nullable
-
-        if self.base_typename == 'Array':
-            self.typename = typename
-
-    def __str__(self):
-        if self.nullable:
-            return f'Nullable({self.typename})'
-        else:
-            return self.typename
-
-    def __repr__(self):
-        return f'<Clickhouse {str(self)}>'
-
-    @classmethod
-    def parse(cls, spec):
-        # TODO(kszucs): spare parsing, depends on clickhouse-driver#22
-        if spec.startswith('Nullable'):
-            return cls(spec[9:-1], nullable=True)
-        else:
-            return cls(spec)
-
-    def to_ibis(self):
-        if self.base_typename != 'Array':
-            return _clickhouse_dtypes[self.typename](nullable=self.nullable)
-
-        sub_type = ClickhouseDataType(
-            self.get_subname(self.typename)
-        ).to_ibis()
-        return dt.Array(value_type=sub_type)
-
-    @staticmethod
-    def get_subname(name: str) -> str:
-        lbracket_pos = name.find('(')
-        rbracket_pos = name.rfind(')')
-
-        if lbracket_pos == -1 or rbracket_pos == -1:
-            return ''
-
-        subname = name[lbracket_pos + 1 : rbracket_pos]
-        return subname
-
-    @staticmethod
-    def get_typename_from_ibis_dtype(dtype):
-        if not isinstance(dtype, dt.Array):
-            return _ibis_dtypes[type(dtype)]
-
-        return 'Array({})'.format(
-            ClickhouseDataType.get_typename_from_ibis_dtype(dtype.value_type)
-        )
-
-    @classmethod
-    def from_ibis(cls, dtype, nullable=None):
-        typename = ClickhouseDataType.get_typename_from_ibis_dtype(dtype)
-        if nullable is None:
-            nullable = dtype.nullable
-        return cls(typename, nullable=nullable)
-
-
-@dt.dtype.register(ClickhouseDataType)
-def clickhouse_to_ibis_dtype(clickhouse_dtype):
-    return clickhouse_dtype.to_ibis()
 
 
 class ClickhouseTable(ir.TableExpr):
@@ -116,30 +15,11 @@ class ClickhouseTable(ir.TableExpr):
 
     @property
     def _qualified_name(self):
-        return self.op().args[0]
-
-    @property
-    def _unqualified_name(self):
-        return self._match_name()[1]
+        return self.op().name
 
     @property
     def _client(self):
-        return self.op().args[2]
-
-    def _match_name(self):
-        m = fully_qualified_re.match(self._qualified_name)
-        if not m:
-            raise com.IbisError(
-                'Cannot determine database name from {}'.format(
-                    self._qualified_name
-                )
-            )
-        db, quoted, unquoted = m.groups()
-        return db, quoted or unquoted
-
-    @property
-    def _database(self):
-        return self._match_name()[0]
+        return self.op().source
 
     def invalidate_metadata(self):
         self._client.invalidate_metadata(self._qualified_name)
@@ -168,10 +48,8 @@ def insert(self, obj, **kwargs):
         assert isinstance(obj, pd.DataFrame)
         assert set(schema.names) >= set(obj.columns)
 
-        columns = ', '.join(map(quote_identifier, obj.columns))
-        query = 'INSERT INTO {table} ({columns}) VALUES'.format(
-            table=self._qualified_name, columns=columns
-        )
+        columns = ", ".join(map(quote_identifier, obj.columns))
+        query = f"INSERT INTO {self._qualified_name} ({columns}) VALUES"
 
         # convert data columns with datetime64 pandas dtype to native date
         # because clickhouse-driver 0.0.10 does arithmetic operations on it
@@ -180,5 +58,11 @@ def insert(self, obj, **kwargs):
             if isinstance(schema[col], dt.Date):
                 obj[col] = obj[col].dt.date
 
-        data = obj.to_dict('records')
-        return self._client.con.execute(query, data, **kwargs)
+        settings = kwargs.pop("settings", {})
+        settings["use_numpy"] = True
+        return self._client.con.insert_dataframe(
+            query,
+            obj,
+            settings=settings,
+            **kwargs,
+        )
diff --git a/ibis/backends/clickhouse/datatypes.py b/ibis/backends/clickhouse/datatypes.py
new file mode 100644
index 000000000000..7f20d98adce4
--- /dev/null
+++ b/ibis/backends/clickhouse/datatypes.py
@@ -0,0 +1,220 @@
+from __future__ import annotations
+
+import functools
+from typing import TYPE_CHECKING
+
+import parsy as p
+
+if TYPE_CHECKING:
+    from ibis.expr.datatypes import DataType
+
+import ibis.expr.datatypes as dt
+
+
+def parse(text: str) -> DataType:
+    @p.generate
+    def datetime():
+        yield dt.spaceless_string("datetime64", "datetime")
+        timezone = yield parened_string.optional()
+        return dt.Timestamp(timezone=timezone, nullable=False)
+
+    primitive = (
+        datetime
+        | dt.spaceless_string("null", "nothing").result(dt.null)
+        | dt.spaceless_string("bigint", "int64").result(
+            dt.Int64(nullable=False)
+        )
+        | dt.spaceless_string("double", "float64").result(
+            dt.Float64(nullable=False)
+        )
+        | dt.spaceless_string("float32", "float").result(
+            dt.Float32(nullable=False)
+        )
+        | dt.spaceless_string("smallint", "int16", "int2").result(
+            dt.Int16(nullable=False)
+        )
+        | dt.spaceless_string("date32", "date").result(dt.Date(nullable=False))
+        | dt.spaceless_string("time").result(dt.Time(nullable=False))
+        | dt.spaceless_string(
+            "tinyint", "int8", "int1", "boolean", "bool"
+        ).result(dt.Int8(nullable=False))
+        | dt.spaceless_string("integer", "int32", "int4", "int").result(
+            dt.Int32(nullable=False)
+        )
+        | dt.spaceless_string("uint64").result(dt.UInt64(nullable=False))
+        | dt.spaceless_string("uint32").result(dt.UInt32(nullable=False))
+        | dt.spaceless_string("uint16").result(dt.UInt16(nullable=False))
+        | dt.spaceless_string("uint8").result(dt.UInt8(nullable=False))
+        | dt.spaceless_string("uuid").result(dt.UUID(nullable=False))
+        | dt.spaceless_string(
+            "longtext",
+            "mediumtext",
+            "tinytext",
+            "text",
+            "longblob",
+            "mediumblob",
+            "tinyblob",
+            "blob",
+            "varchar",
+            "char",
+            "string",
+        ).result(dt.String(nullable=False))
+    )
+
+    @p.generate
+    def parened_string():
+        yield dt.LPAREN
+        s = yield dt.RAW_STRING
+        yield dt.RPAREN
+        return s
+
+    @p.generate
+    def nullable():
+        yield dt.spaceless_string("nullable")
+        yield dt.LPAREN
+        parsed_ty = yield ty
+        yield dt.RPAREN
+        return parsed_ty(nullable=True)
+
+    @p.generate
+    def fixed_string():
+        yield dt.spaceless_string("fixedstring")
+        yield dt.LPAREN
+        yield dt.NUMBER
+        yield dt.RPAREN
+        return dt.String(nullable=False)
+
+    @p.generate
+    def decimal():
+        yield dt.spaceless_string("decimal", "numeric")
+        precision, scale = yield dt.LPAREN.then(
+            p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)
+        ).skip(dt.RPAREN)
+        return dt.Decimal(precision, scale, nullable=False)
+
+    @p.generate
+    def paren_type():
+        yield dt.LPAREN
+        value_type = yield ty
+        yield dt.RPAREN
+        return value_type
+
+    @p.generate
+    def array():
+        yield dt.spaceless_string("array")
+        value_type = yield paren_type
+        return dt.Array(value_type, nullable=False)
+
+    @p.generate
+    def map():
+        yield dt.spaceless_string("map")
+        yield dt.LPAREN
+        key_type = yield ty
+        yield dt.COMMA
+        value_type = yield ty
+        yield dt.RPAREN
+        return dt.Map(key_type, value_type, nullable=False)
+
+    at_least_one_space = p.regex(r"\s+")
+
+    @p.generate
+    def nested():
+        yield dt.spaceless_string("nested")
+        yield dt.LPAREN
+
+        field_names_types = yield (
+            p.seq(dt.SPACES.then(dt.FIELD.skip(at_least_one_space)), ty)
+            .combine(lambda field, ty: (field, dt.Array(ty, nullable=False)))
+            .sep_by(dt.COMMA)
+        )
+        yield dt.RPAREN
+        return dt.Struct.from_tuples(field_names_types, nullable=False)
+
+    @p.generate
+    def struct():
+        yield dt.spaceless_string("tuple")
+        yield dt.LPAREN
+        field_names_types = yield (
+            p.seq(
+                dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()),
+                ty,
+            )
+            .combine(lambda field, ty: (field, ty))
+            .sep_by(dt.COMMA)
+        )
+        yield dt.RPAREN
+        return dt.Struct.from_tuples(
+            [
+                (field_name if field_name is not None else f"f{i:d}", typ)
+                for i, (field_name, typ) in enumerate(field_names_types)
+            ],
+            nullable=False,
+        )
+
+    ty = (
+        nullable
+        | nested
+        | primitive
+        | fixed_string
+        | decimal
+        | array
+        | map
+        | struct
+    )
+    return ty.parse(text)
+
+
+@functools.singledispatch
+def serialize(ty) -> str:
+    raise NotImplementedError(
+        f"{ty} not serializable to clickhouse type string"
+    )
+
+
+@serialize.register(dt.DataType)
+def _(ty: dt.DataType) -> str:
+    ser_ty = serialize_raw(ty)
+    if ty.nullable:
+        return f"Nullable({ser_ty})"
+    return ser_ty
+
+
+@functools.singledispatch
+def serialize_raw(ty: dt.DataType) -> str:
+    raise NotImplementedError(
+        f"{ty} not serializable to clickhouse type string"
+    )
+
+
+@serialize_raw.register(dt.DataType)
+def _(ty: dt.DataType) -> str:
+    return type(ty).__name__.capitalize()
+
+
+@serialize_raw.register(dt.Array)
+def _(ty: dt.Array) -> str:
+    return f"Array({serialize(ty.value_type)})"
+
+
+@serialize_raw.register(dt.Map)
+def _(ty: dt.Map) -> str:
+    key_type = serialize(ty.key_type)
+    value_type = serialize(ty.value_type)
+    return f"Map({key_type}, {value_type})"
+
+
+@serialize_raw.register(dt.Struct)
+def _(ty: dt.Struct) -> str:
+    fields = ", ".join(
+        f"{name} {serialize(field_ty)}" for name, field_ty in ty.pairs.items()
+    )
+    return f"Tuple({fields})"
+
+
+@serialize_raw.register(dt.Timestamp)
+def _(ty: dt.Timestamp) -> str:
+    return (
+        "DateTime64(6)"
+        if ty.timezone is None
+        else f"DateTime64(6, {ty.timezone!r})"
+    )
diff --git a/ibis/backends/clickhouse/registry.py b/ibis/backends/clickhouse/registry.py
index 7f3f785b7ec3..7cb1db2abfa6 100644
--- a/ibis/backends/clickhouse/registry.py
+++ b/ibis/backends/clickhouse/registry.py
@@ -6,6 +6,7 @@
 import ibis.expr.operations as ops
 import ibis.expr.types as ir
 import ibis.util as util
+from ibis.backends.clickhouse.datatypes import serialize
 from ibis.backends.clickhouse.identifiers import quote_identifier
 
 # TODO(kszucs): should inherit operation registry from the base compiler
@@ -19,12 +20,11 @@ def _alias(translator, expr):
 
 
 def _cast(translator, expr):
-    from ibis.backends.clickhouse.client import ClickhouseDataType
-
     op = expr.op()
-    arg, target = op.args
+    arg = op.arg
+    target = op.to
     arg_ = translator.translate(arg)
-    type_ = str(ClickhouseDataType.from_ibis(target, nullable=False))
+    type_ = serialize(target)
 
     return f'CAST({arg_!s} AS {type_!s})'
 
diff --git a/ibis/backends/clickhouse/tests/test_functions.py b/ibis/backends/clickhouse/tests/test_functions.py
index 6b91ea318efc..36e9e13bed1c 100644
--- a/ibis/backends/clickhouse/tests/test_functions.py
+++ b/ibis/backends/clickhouse/tests/test_functions.py
@@ -18,9 +18,11 @@
 @pytest.mark.parametrize(
     ('to_type', 'expected'),
     [
-        param('int8', 'CAST(`double_col` AS Int8)', id="int8"),
-        param('int16', 'CAST(`double_col` AS Int16)', id="int16"),
-        param('float32', 'CAST(`double_col` AS Float32)', id="float32"),
+        param('int8', 'CAST(`double_col` AS Nullable(Int8))', id="int8"),
+        param('int16', 'CAST(`double_col` AS Nullable(Int16))', id="int16"),
+        param(
+            'float32', 'CAST(`double_col` AS Nullable(Float32))', id="float32"
+        ),
         param('float', '`double_col`', id="float"),
         # alltypes.double_col is non-nullable
         param(
@@ -38,11 +40,22 @@ def test_cast_double_col(alltypes, translate, to_type, expected):
 @pytest.mark.parametrize(
     ('to_type', 'expected'),
     [
-        ('int8', 'CAST(`string_col` AS Int8)'),
-        ('int16', 'CAST(`string_col` AS Int16)'),
+        ('int8', 'CAST(`string_col` AS Nullable(Int8))'),
+        ('int16', 'CAST(`string_col` AS Nullable(Int16))'),
         (dt.String(nullable=False), 'CAST(`string_col` AS String)'),
-        ('timestamp', 'CAST(`string_col` AS DateTime)'),
-        ('date', 'CAST(`string_col` AS Date)'),
+        ('timestamp', 'CAST(`string_col` AS Nullable(DateTime64(6)))'),
+        ('date', 'CAST(`string_col` AS Nullable(Date))'),
+        (
+            '!map<string, int64>',
+            'CAST(`string_col` AS Map(Nullable(String), Nullable(Int64)))',
+        ),
+        (
+            '!struct<a: string, b: int64>',
+            (
+                'CAST(`string_col` AS '
+                'Tuple(a Nullable(String), b Nullable(Int64)))'
+            ),
+        ),
     ],
 )
 def test_cast_string_col(alltypes, translate, to_type, expected):
@@ -85,15 +98,13 @@ def test_timestamp_cast(alltypes, translate):
     assert isinstance(result1, ir.TimestampColumn)
     assert isinstance(result2, ir.TimestampColumn)
 
-    assert translate(result1) == 'CAST(`timestamp_col` AS DateTime)'
-    assert translate(result2) == 'CAST(`int_col` AS DateTime)'
+    assert translate(result1) == 'CAST(`timestamp_col` AS DateTime64(6))'
+    assert translate(result2) == 'CAST(`int_col` AS DateTime64(6))'
 
 
-def test_timestamp_now(con, translate):
+def test_timestamp_now(translate):
     expr = ibis.now()
-    # now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
     assert translate(expr) == 'now()'
-    # assert con.execute(expr) == now
 
 
 @pytest.mark.parametrize(
@@ -107,7 +118,7 @@ def test_timestamp_now(con, translate):
         ('minute', '2009-05-17 12:34:00'),
     ],
 )
-def test_timestamp_truncate(con, translate, unit, expected):
+def test_timestamp_truncate(con, unit, expected):
     stamp = ibis.timestamp('2009-05-17 12:34:56')
     expr = stamp.truncate(unit)
     assert con.execute(expr) == pd.Timestamp(expected)
@@ -268,7 +279,7 @@ def test_string_contains(con, op, value, expected):
 
 
 # TODO: clickhouse-driver escaping bug
-def test_re_replace(con, translate):
+def test_re_replace(con):
     expr1 = L('Hello, World!').re_replace('.', '\\\\0\\\\0')
     expr2 = L('Hello, World!').re_replace('^', 'here: ')
 
@@ -280,7 +291,7 @@ def test_re_replace(con, translate):
     ('value', 'expected'),
     [(L('a'), 0), (L('b'), 1), (L('d'), -1)],  # TODO: what's the expected?
 )
-def test_find_in_set(con, value, expected, translate):
+def test_find_in_set(con, value, expected):
     vals = list('abc')
     expr = value.find_in_set(vals)
     assert con.execute(expr) == expected
@@ -312,12 +323,12 @@ def test_string_column_find_in_set(con, alltypes, translate):
         ),
     ],
 )
-def test_parse_url(con, translate, url, extract, expected):
+def test_parse_url(con, url, extract, expected):
     expr = url.parse_url(extract)
     assert con.execute(expr) == expected
 
 
-def test_parse_url_query_parameter(con, translate):
+def test_parse_url_query_parameter(con):
     url = L('https://www.youtube.com/watch?v=kEuEcWfewf8&t=10')
     expr = url.parse_url('QUERY', 't')
     assert con.execute(expr) == '10'
@@ -427,7 +438,7 @@ def test_translate_math_functions(con, alltypes, translate, call, expected):
         ),
     ],
 )
-def test_math_functions(con, expr, expected, translate):
+def test_math_functions(con, expr, expected):
     assert con.execute(expr) == expected
 
 
@@ -476,7 +487,7 @@ def test_regexp(con, expr, expected):
         # (L('abcd').re_extract('abcd', 3), None),
     ],
 )
-def test_regexp_extract(con, expr, expected, translate):
+def test_regexp_extract(con, expr, expected):
     assert con.execute(expr) == expected
 
 
@@ -496,14 +507,14 @@ def test_column_regexp_replace(con, alltypes, translate):
     assert len(con.execute(expr))
 
 
-def test_numeric_builtins_work(con, alltypes, df, translate):
+def test_numeric_builtins_work(alltypes, df):
     expr = alltypes.double_col
     result = expr.execute()
     expected = df.double_col.fillna(0)
     tm.assert_series_equal(result, expected)
 
 
-def test_null_column(alltypes, translate):
+def test_null_column(alltypes):
     t = alltypes
     nrows = t.count().execute()
     expr = t.mutate(na_column=ibis.NA).na_column
diff --git a/ibis/backends/clickhouse/tests/test_operators.py b/ibis/backends/clickhouse/tests/test_operators.py
index 213452082eae..aa40666fa26e 100644
--- a/ibis/backends/clickhouse/tests/test_operators.py
+++ b/ibis/backends/clickhouse/tests/test_operators.py
@@ -312,10 +312,10 @@ def test_array_index(con, arr, ids):
     ],
 )
 def test_array_concat(con, arrays):
-    expr = L([]).cast(dt.Array(dt.int8))
+    expr = L([]).cast("!array<int8>")
     expected = sum(arrays, [])
     for arr in arrays:
-        expr += L(arr)
+        expr += L(arr, type="!array<int8>")
 
     assert con.execute(expr) == expected
 
diff --git a/ibis/backends/clickhouse/tests/test_select.py b/ibis/backends/clickhouse/tests/test_select.py
index cfa69c0c6953..9e13d44a337f 100644
--- a/ibis/backends/clickhouse/tests/test_select.py
+++ b/ibis/backends/clickhouse/tests/test_select.py
@@ -193,7 +193,7 @@ def test_complex_array_expr_projection(db, alltypes):
 
     query = ibis.clickhouse.compile(expr2)
     name = expr2.get_name()
-    expected = f"""SELECT CAST(`string_col` AS Float64) AS `{name}`
+    expected = f"""SELECT CAST(`string_col` AS Nullable(Float64)) AS `{name}`
 FROM (
   SELECT `string_col`, count(*) AS `count`
   FROM {db.name}.`functional_alltypes`
diff --git a/ibis/backends/clickhouse/tests/test_types.py b/ibis/backends/clickhouse/tests/test_types.py
index d2f63fee600e..7b1785f551b3 100644
--- a/ibis/backends/clickhouse/tests/test_types.py
+++ b/ibis/backends/clickhouse/tests/test_types.py
@@ -1,8 +1,7 @@
 import pytest
-from pkg_resources import parse_version
 
 import ibis.expr.datatypes as dt
-from ibis.backends.clickhouse.client import ClickhouseDataType
+from ibis.backends.clickhouse.datatypes import parse
 
 pytest.importorskip("clickhouse_driver")
 
@@ -19,48 +18,128 @@ def test_column_types(alltypes):
 
 
 def test_columns_types_with_additional_argument(con):
-    sql_types = ["toFixedString('foo', 8) AS fixedstring_col"]
-    if parse_version(con.version).base_version >= '1.1.54337':
-        sql_types.append(
-            "toDateTime('2018-07-02 00:00:00', 'UTC') AS datetime_col"
-        )
-    sql = 'SELECT {}'.format(', '.join(sql_types))
-    df = con.sql(sql).execute()
+    sql_types = [
+        "toFixedString('foo', 8) AS fixedstring_col",
+        "toDateTime('2018-07-02 00:00:00', 'UTC') AS datetime_col",
+    ]
+    df = con.sql(f"SELECT {', '.join(sql_types)}").execute()
     assert df.fixedstring_col.dtype.name == 'object'
-    if parse_version(con.version).base_version >= '1.1.54337':
-        assert df.datetime_col.dtype.name == 'datetime64[ns]'
+    assert df.datetime_col.dtype.name == 'datetime64[ns, UTC]'
 
 
 @pytest.mark.parametrize(
     ('ch_type', 'ibis_type'),
     [
-        ('Array(Int8)', dt.Array(dt.Int8(nullable=False))),
-        ('Array(Int16)', dt.Array(dt.Int16(nullable=False))),
-        ('Array(Int32)', dt.Array(dt.Int32(nullable=False))),
-        ('Array(Int64)', dt.Array(dt.Int64(nullable=False))),
-        ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False))),
-        ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False))),
-        ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False))),
-        ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False))),
-        ('Array(Float32)', dt.Array(dt.Float32(nullable=False))),
-        ('Array(Float64)', dt.Array(dt.Float64(nullable=False))),
-        ('Array(String)', dt.Array(dt.String(nullable=False))),
-        ('Array(FixedString(32))', dt.Array(dt.String(nullable=False))),
-        ('Array(Date)', dt.Array(dt.Date(nullable=False))),
-        ('Array(DateTime)', dt.Array(dt.Timestamp(nullable=False))),
-        ('Array(DateTime64)', dt.Array(dt.Timestamp(nullable=False))),
-        ('Array(Nothing)', dt.Array(dt.Null(nullable=False))),
-        ('Array(Null)', dt.Array(dt.Null(nullable=False))),
-        ('Array(Array(Int8))', dt.Array(dt.Array(dt.Int8(nullable=False)))),
+        ('Array(Int8)', dt.Array(dt.Int8(nullable=False), nullable=False)),
+        ('Array(Int16)', dt.Array(dt.Int16(nullable=False), nullable=False)),
+        ('Array(Int32)', dt.Array(dt.Int32(nullable=False), nullable=False)),
+        ('Array(Int64)', dt.Array(dt.Int64(nullable=False), nullable=False)),
+        ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False), nullable=False)),
+        ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False), nullable=False)),
+        ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False), nullable=False)),
+        ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False), nullable=False)),
+        (
+            'Array(Float32)',
+            dt.Array(dt.Float32(nullable=False), nullable=False),
+        ),
+        (
+            'Array(Float64)',
+            dt.Array(dt.Float64(nullable=False), nullable=False),
+        ),
+        ('Array(String)', dt.Array(dt.String(nullable=False), nullable=False)),
+        (
+            'Array(FixedString(32))',
+            dt.Array(dt.String(nullable=False), nullable=False),
+        ),
+        ('Array(Date)', dt.Array(dt.Date(nullable=False), nullable=False)),
+        (
+            'Array(DateTime)',
+            dt.Array(dt.Timestamp(nullable=False), nullable=False),
+        ),
+        (
+            'Array(DateTime64)',
+            dt.Array(dt.Timestamp(nullable=False), nullable=False),
+        ),
+        ('Array(Nothing)', dt.Array(dt.null, nullable=False)),
+        ('Array(Null)', dt.Array(dt.null, nullable=False)),
+        (
+            'Array(Array(Int8))',
+            dt.Array(
+                dt.Array(dt.Int8(nullable=False), nullable=False),
+                nullable=False,
+            ),
+        ),
         (
             'Array(Array(Array(Int8)))',
-            dt.Array(dt.Array(dt.Array(dt.Int8(nullable=False)))),
+            dt.Array(
+                dt.Array(
+                    dt.Array(dt.Int8(nullable=False), nullable=False),
+                    nullable=False,
+                ),
+                nullable=False,
+            ),
         ),
         (
             'Array(Array(Array(Array(Int8))))',
-            dt.Array(dt.Array(dt.Array(dt.Array(dt.Int8(nullable=False))))),
+            dt.Array(
+                dt.Array(
+                    dt.Array(
+                        dt.Array(dt.Int8(nullable=False), nullable=False),
+                        nullable=False,
+                    ),
+                    nullable=False,
+                ),
+                nullable=False,
+            ),
+        ),
+        (
+            "Map(Nullable(String), Nullable(UInt64))",
+            dt.Map(dt.string, dt.uint64, nullable=False),
+        ),
+        ("Decimal(10, 3)", dt.Decimal(10, 3, nullable=False)),
+        (
+            "Tuple(a String, b Array(Nullable(Float64)))",
+            dt.Struct.from_dict(
+                dict(
+                    a=dt.String(nullable=False),
+                    b=dt.Array(dt.float64, nullable=False),
+                ),
+                nullable=False,
+            ),
+        ),
+        (
+            "Tuple(String, Array(Nullable(Float64)))",
+            dt.Struct.from_dict(
+                dict(
+                    f0=dt.String(nullable=False),
+                    f1=dt.Array(dt.float64, nullable=False),
+                ),
+                nullable=False,
+            ),
+        ),
+        (
+            "Tuple(a String, Array(Nullable(Float64)))",
+            dt.Struct.from_dict(
+                dict(
+                    a=dt.String(nullable=False),
+                    f1=dt.Array(dt.float64, nullable=False),
+                ),
+                nullable=False,
+            ),
+        ),
+        (
+            "Nested(a String, b Array(Nullable(Float64)))",
+            dt.Struct.from_dict(
+                dict(
+                    a=dt.Array(dt.String(nullable=False), nullable=False),
+                    b=dt.Array(
+                        dt.Array(dt.float64, nullable=False), nullable=False
+                    ),
+                ),
+                nullable=False,
+            ),
         ),
     ],
 )
-def test_array_type(ch_type, ibis_type):
-    assert ClickhouseDataType(ch_type).to_ibis() == ibis_type
+def test_parse_type(ch_type, ibis_type):
+    assert parse(ch_type) == ibis_type
diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py
index 14155a6f5b74..fe48bf6d2ba9 100644
--- a/ibis/backends/duckdb/__init__.py
+++ b/ibis/backends/duckdb/__init__.py
@@ -16,7 +16,7 @@
 import ibis.expr.schema as sch
 from ibis.backends.base.sql.alchemy import BaseAlchemyBackend
 from ibis.backends.duckdb.compiler import DuckDBSQLCompiler
-from ibis.backends.duckdb.datatypes import parse_type
+from ibis.backends.duckdb.datatypes import parse
 
 
 class Backend(BaseAlchemyBackend):
@@ -73,10 +73,7 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
         with self.con.connect() as con:
             rel = con.connection.c.query(query)
         return sch.Schema.from_dict(
-            {
-                name: parse_type(type)
-                for name, type in zip(rel.columns, rel.types)
-            }
+            {name: parse(type) for name, type in zip(rel.columns, rel.types)}
         )
 
     def _get_sqla_table(
@@ -113,7 +110,7 @@ def _get_sqla_table(
                 if colname in nulltype_cols:
                     column = sa.Column(
                         colname,
-                        to_sqla_type(parse_type(type)),
+                        to_sqla_type(parse(type)),
                         nullable=null == "YES",
                     )
                     # replace null types discovered by sqlite with non null
diff --git a/ibis/backends/duckdb/datatypes.py b/ibis/backends/duckdb/datatypes.py
index d3ae0c8d0a88..1f04a35801ec 100644
--- a/ibis/backends/duckdb/datatypes.py
+++ b/ibis/backends/duckdb/datatypes.py
@@ -1,14 +1,26 @@
 from __future__ import annotations
 
-import re
 from typing import TYPE_CHECKING
 
 import parsy as p
 
+from ibis import util
+
 if TYPE_CHECKING:
     from ibis.expr.datatypes import DataType
 
 from ibis.expr.datatypes import (
+    COLON,
+    COMMA,
+    FIELD,
+    LANGLE,
+    LBRACKET,
+    LPAREN,
+    PRECISION,
+    RANGLE,
+    RBRACKET,
+    RPAREN,
+    SCALE,
     Array,
     Decimal,
     Interval,
@@ -24,6 +36,8 @@
     int16,
     int32,
     int64,
+    spaceless,
+    spaceless_string,
     string,
     time,
     uint8,
@@ -33,34 +47,9 @@
     uuid,
 )
 
-_SPACES = p.regex(r'\s*', re.MULTILINE)
-
-
-def spaceless(parser):
-    return _SPACES.then(parser).skip(_SPACES)
-
-
-def spaceless_string(*strings: str):
-    return spaceless(
-        p.alt(*[p.string(s, transform=str.lower) for s in strings])
-    )
-
-
-def parse_type(text: str, default_decimal_parameters=(18, 3)) -> DataType:
-    precision = scale = p.digit.at_least(1).concat().map(int)
-
-    lparen = spaceless_string("(")
-    rparen = spaceless_string(")")
-
-    lbracket = spaceless_string("[")
-    rbracket = spaceless_string("]")
-
-    langle = spaceless_string("<")
-    rangle = spaceless_string(">")
-
-    comma = spaceless_string(",")
-    colon = spaceless_string(":")
 
+def parse(text: str, default_decimal_parameters=(18, 3)) -> DataType:
+    """Parse a DuckDB type into an ibis data type."""
     primitive = (
         spaceless_string("interval").result(Interval())
         | spaceless_string("bigint", "int8", "long").result(int64)
@@ -99,21 +88,21 @@ def parse_type(text: str, default_decimal_parameters=(18, 3)) -> DataType:
     def decimal():
         yield spaceless_string("decimal", "numeric")
         prec_scale = (
-            yield lparen.then(
-                p.seq(precision.skip(comma), scale).combine(
+            yield LPAREN.then(
+                p.seq(PRECISION.skip(COMMA), SCALE).combine(
                     lambda prec, scale: (prec, scale)
                 )
             )
-            .skip(rparen)
+            .skip(RPAREN)
             .optional()
         ) or default_decimal_parameters
         return Decimal(*prec_scale)
 
     @p.generate
     def angle_type():
-        yield langle
+        yield LANGLE
         value_type = yield ty
-        yield rangle
+        yield RANGLE
         return value_type
 
     @p.generate
@@ -125,34 +114,42 @@ def list_array():
     @p.generate
     def pg_array():
         value_type = yield non_pg_array_type
-        yield lbracket
-        yield rbracket
+        yield LBRACKET
+        yield RBRACKET
         return Array(value_type)
 
     @p.generate
     def map():
         yield spaceless_string("map")
-        yield langle
+        yield LANGLE
         key_type = yield primitive
-        yield comma
+        yield COMMA
         value_type = yield ty
-        yield rangle
+        yield RANGLE
         return Map(key_type, value_type)
 
-    field = spaceless(p.regex("[a-zA-Z_][a-zA-Z_0-9]*"))
+    field = spaceless(FIELD)
 
     @p.generate
     def struct():
         yield spaceless_string("struct")
-        yield langle
+        yield LANGLE
         field_names_types = yield (
-            p.seq(field.skip(colon), ty)
+            p.seq(field.skip(COLON), ty)
             .combine(lambda field, ty: (field, ty))
-            .sep_by(comma)
+            .sep_by(COMMA)
         )
-        yield rangle
+        yield RANGLE
         return Struct.from_tuples(field_names_types)
 
     non_pg_array_type = primitive | decimal | list_array | map | struct
     ty = pg_array | non_pg_array_type
     return ty.parse(text)
+
+
+@util.deprecated(
+    instead=f"use {parse.__module__}.{parse.__name__}",
+    version="4.0",
+)
+def parse_type(*args, **kwargs):
+    return parse(*args, **kwargs)
diff --git a/ibis/backends/duckdb/tests/test_datatypes.py b/ibis/backends/duckdb/tests/test_datatypes.py
index 21a87fef609d..e08597ea0a46 100644
--- a/ibis/backends/duckdb/tests/test_datatypes.py
+++ b/ibis/backends/duckdb/tests/test_datatypes.py
@@ -2,7 +2,7 @@
 from pytest import param
 
 import ibis.expr.datatypes as dt
-from ibis.backends.duckdb.datatypes import parse_type
+from ibis.backends.duckdb.datatypes import parse, parse_type
 
 EXPECTED_SCHEMA = dict(
     a=dt.int64,
@@ -116,5 +116,10 @@
     ],
 )
 def test_parser(column, type):
-    ty = parse_type(type)
+    ty = parse(type)
     assert ty == EXPECTED_SCHEMA[column]
+
+
+def test_parse_type_warns():
+    with pytest.warns(FutureWarning):
+        parse_type("BIGINT")
diff --git a/ibis/backends/postgres/datatypes.py b/ibis/backends/postgres/datatypes.py
index dc02cf516e7c..abb42edb5d39 100644
--- a/ibis/backends/postgres/datatypes.py
+++ b/ibis/backends/postgres/datatypes.py
@@ -8,7 +8,7 @@ def _get_type(typestr: str) -> dt.DataType:
     try:
         return _type_mapping[typestr]
     except KeyError:
-        return ddb.parse_type(typestr)
+        return ddb.parse(typestr)
 
 
 _type_mapping = {
diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py
index a8b27e34ac9c..83be8b6ef7cd 100644
--- a/ibis/backends/tests/test_array.py
+++ b/ibis/backends/tests/test_array.py
@@ -80,11 +80,6 @@ def test_np_array_literal(con):
 
 
 builtin_array = toolz.compose(
-    # the type parser needs additional work for this to work
-    pytest.mark.broken(
-        "clickhouse",
-        reason="nullable types can't yet be parsed",
-    ),
     # these will almost certainly never be supported
     pytest.mark.never(
         ["mysql", "sqlite"],
@@ -112,4 +107,6 @@ def test_array_discovery(con):
             scalar_column=dt.float64,
         )
     )
-    assert t.schema() == expected
+    assert t.columns == list(expected.names)
+    for ty, expected_ty in zip(t.schema().types, expected.types):
+        assert isinstance(ty, type(expected_ty))
diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py
index a586dafb5498..5165602f061d 100644
--- a/ibis/expr/datatypes/core.py
+++ b/ibis/expr/datatypes/core.py
@@ -26,6 +26,7 @@
 from multipledispatch import Dispatcher
 from public import public
 
+from ibis import util
 from ibis.common.exceptions import IbisTypeError, InputTypeError
 from ibis.common.grounds import Annotable, Comparable, Singleton
 from ibis.common.validators import (
@@ -61,7 +62,7 @@ def default(value, **kwargs) -> DataType:
 @dtype.register(str)
 def from_string(value: str) -> DataType:
     try:
-        return parse_type(value)
+        return parse(value)
     except SyntaxError:
         raise IbisTypeError(f'{value!r} cannot be parsed as a datatype')
 
@@ -936,20 +937,62 @@ class INET(String):
 
 _STRING_REGEX = """('[^\n'\\\\]*(?:\\\\.[^\n'\\\\]*)*'|"[^\n"\\\\"]*(?:\\\\.[^\n"\\\\]*)*")"""  # noqa: E501
 
-_SPACES = p.regex(r'\s*', re.MULTILINE)
+SPACES = p.regex(r'\s*', re.MULTILINE)
 
 
+@public
 def spaceless(parser):
-    return _SPACES.then(parser).skip(_SPACES)
+    return SPACES.then(parser).skip(SPACES)
+
+
+@public
+def spaceless_string(*strings: str):
+    return spaceless(
+        p.alt(*(p.string(string, transform=str.lower) for string in strings))
+    )
+
 
+RAW_NUMBER = p.digit.at_least(1).concat()
+PRECISION = SCALE = NUMBER = RAW_NUMBER.map(int)
 
-def spaceless_string(s: str):
-    return spaceless(p.string(s, transform=str.lower))
+LPAREN = spaceless_string("(")
+RPAREN = spaceless_string(")")
+
+LBRACKET = spaceless_string("[")
+RBRACKET = spaceless_string("]")
+
+LANGLE = spaceless_string("<")
+RANGLE = spaceless_string(">")
+
+COMMA = spaceless_string(",")
+COLON = spaceless_string(":")
+SEMICOLON = spaceless_string(";")
+
+RAW_STRING = p.regex(_STRING_REGEX).map(ast.literal_eval)
+FIELD = p.regex("[a-zA-Z_][a-zA-Z_0-9]*")
+
+public(
+    COLON=COLON,
+    COMMA=COMMA,
+    FIELD=FIELD,
+    LANGLE=LANGLE,
+    LBRACKET=LBRACKET,
+    RBRACKET=RBRACKET,
+    LPAREN=LPAREN,
+    NUMBER=NUMBER,
+    PRECISION=PRECISION,
+    RANGLE=RANGLE,
+    RAW_STRING=RAW_STRING,
+    RPAREN=RPAREN,
+    SCALE=SCALE,
+    SEMICOLON=SEMICOLON,
+    SPACES=SPACES,
+)
 
 
 @public
 @functools.lru_cache(maxsize=100)
-def parse_type(text: str) -> DataType:
+def parse(text: str) -> DataType:
     """Parse a type from a [`str`][str] `text`.
 
     The default `maxsize` parameter for caching is chosen to cache the most
@@ -967,50 +1010,38 @@ def parse_type(text: str) -> DataType:
 
     >>> import ibis
     >>> import ibis.expr.datatypes as dt
-    >>> dt.parse_type("array<int64>")
+    >>> dt.parse("array<int64>")
     Array(value_type=Int64(nullable=True), nullable=True)
 
     You can avoid parsing altogether by constructing objects directly
 
     >>> import ibis
     >>> import ibis.expr.datatypes as dt
-    >>> ty = dt.parse_type("array<int64>")
+    >>> ty = dt.parse("array<int64>")
     >>> ty == dt.Array(dt.int64)
     True
     """
-    precision = scale = srid = p.digit.at_least(1).concat().map(int)
-
-    lparen = spaceless_string("(")
-    rparen = spaceless_string(")")
-
-    langle = spaceless_string("<")
-    rangle = spaceless_string(">")
-
-    comma = spaceless_string(",")
-    colon = spaceless_string(":")
-    semicolon = spaceless_string(";")
-
-    raw_string = p.regex(_STRING_REGEX).map(ast.literal_eval)
 
+    srid = NUMBER
     geotype = spaceless_string("geography") | spaceless_string("geometry")
 
     @p.generate
     def srid_geotype():
-        yield semicolon
+        yield SEMICOLON
         sr = yield srid
-        yield colon
+        yield COLON
         gt = yield geotype
         return (gt, sr)
 
     @p.generate
     def geotype_part():
-        yield colon
+        yield COLON
         gt = yield geotype
         return (gt, None)
 
     @p.generate
     def srid_part():
-        yield semicolon
+        yield SEMICOLON
         sr = yield srid
         return (None, sr)
 
@@ -1062,32 +1093,28 @@ def parser():
 
     @p.generate
     def varchar_or_char():
-        yield p.alt(
-            spaceless_string("varchar"), spaceless_string("char")
-        ).then(
-            lparen.then(p.digit.at_least(1).concat()).skip(rparen).optional()
+        yield spaceless_string("varchar", "char").then(
+            LPAREN.then(RAW_NUMBER).skip(RPAREN).optional()
         )
         return String()
 
     @p.generate
     def decimal():
         yield spaceless_string("decimal")
-        prec, sc = (
-            yield lparen.then(
-                p.seq(precision.skip(comma), scale).combine(
-                    lambda prec, scale: (prec, scale)
-                )
+        precision, scale = (
+            yield LPAREN.then(
+                p.seq(spaceless(PRECISION).skip(COMMA), spaceless(SCALE))
             )
-            .skip(rparen)
+            .skip(RPAREN)
             .optional()
         ) or (None, None)
-        return Decimal(precision=prec, scale=sc)
+        return Decimal(precision=precision, scale=scale)
 
     @p.generate
     def parened_string():
-        yield lparen
-        s = yield raw_string
-        yield rparen
+        yield LPAREN
+        s = yield RAW_STRING
+        yield RPAREN
         return s
 
     @p.generate
@@ -1098,18 +1125,19 @@ def timestamp():
 
     @p.generate
     def angle_type():
-        yield langle
+        yield LANGLE
         value_type = yield ty
-        yield rangle
+        yield RANGLE
         return value_type
 
     @p.generate
     def interval():
         yield spaceless_string("interval")
         value_type = yield angle_type.optional()
-        un = yield parened_string.optional()
+        unit = yield parened_string.optional()
         return Interval(
-            value_type=value_type, unit=un if un is not None else 's'
+            value_type=value_type,
+            unit=unit if unit is not None else "s",
         )
 
     @p.generate
@@ -1127,29 +1155,36 @@ def set():
     @p.generate
     def map():
         yield spaceless_string("map")
-        yield langle
+        yield LANGLE
         key_type = yield primitive
-        yield comma
+        yield COMMA
         value_type = yield ty
-        yield rangle
+        yield RANGLE
         return Map(key_type, value_type)
 
-    field = spaceless(p.regex("[a-zA-Z_][a-zA-Z_0-9]*"))
+    spaceless_field = spaceless(FIELD)
 
     @p.generate
     def struct():
         yield spaceless_string("struct")
-        yield langle
+        yield LANGLE
         field_names_types = yield (
-            p.seq(field.skip(colon), ty)
+            p.seq(spaceless_field.skip(COLON), ty)
             .combine(lambda field, ty: (field, ty))
-            .sep_by(comma)
+            .sep_by(COMMA)
         )
-        yield rangle
+        yield RANGLE
         return Struct.from_tuples(field_names_types)
 
+    @p.generate
+    def nullable():
+        yield spaceless_string("!")
+        parsed_ty = yield ty
+        return parsed_ty(nullable=False)
+
     ty = (
-        timestamp
+        nullable
+        | timestamp
         | primitive
         | decimal
         | varchar_or_char
@@ -1172,6 +1207,15 @@ def struct():
     return ty.parse(text)
 
 
+@util.deprecated(
+    instead=f"use {parse.__module__}.{parse.__name__}",
+    version="4.0",
+)
+@public
+def parse_type(*args, **kwargs):
+    return parse(*args, **kwargs)
+
+
 def _get_timedelta_units(
     timedelta: datetime.timedelta | pd.Timedelta,
 ) -> list[str]:
diff --git a/ibis/tests/benchmarks/test_benchmarks.py b/ibis/tests/benchmarks/test_benchmarks.py
index 78c6523df1be..60a88ba4981b 100644
--- a/ibis/tests/benchmarks/test_benchmarks.py
+++ b/ibis/tests/benchmarks/test_benchmarks.py
@@ -544,8 +544,8 @@ def test_complex_datatype_parse(benchmark):
             )
         )
     )
-    assert dt.parse_type(type_str) == expected
-    benchmark(dt.parse_type, type_str)
+    assert dt.parse(type_str) == expected
+    benchmark(dt.parse, type_str)
 
 
 @pytest.mark.benchmark(group="datatype")
diff --git a/ibis/tests/expr/test_datatypes.py b/ibis/tests/expr/test_datatypes.py
index e9d1d62934e5..6b5ea692cec3 100644
--- a/ibis/tests/expr/test_datatypes.py
+++ b/ibis/tests/expr/test_datatypes.py
@@ -502,3 +502,8 @@ class MyStruct(dt.Struct):
 
     dtype = MyStruct.from_tuples([('a', 'int64')])
     assert isinstance(dtype, MyStruct)
+
+
+def test_parse_type_warns():
+    with pytest.warns(FutureWarning):
+        dt.parse_type("int64")
diff --git a/poetry.lock b/poetry.lock
index 5ce59352d5d3..46802d895252 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -251,6 +251,8 @@ optional = true
 python-versions = ">=3.4.*, <4"
 
 [package.dependencies]
+numpy = {version = ">=1.12.0", optional = true, markers = "extra == \"numpy\""}
+pandas = {version = ">=0.24.0", optional = true, markers = "extra == \"numpy\""}
 pytz = "*"
 tzlocal = "*"
 
@@ -2323,7 +2325,7 @@ visualization = ["graphviz"]
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.8,<3.11"
-content-hash = "141e837de174d1ddbbc1a7f8d519c0b0320323e4bb38f08de5f80740372ec4e3"
+content-hash = "6cdfde1ce4daee23a5c5e195b89ba5a3ef5d436e700899573dbe184344fc17cb"
 
 [metadata.files]
 absolufy-imports = [
diff --git a/pyproject.toml b/pyproject.toml
index 12782ca841fa..eb9d87b8f253 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,9 @@ regex = ">=2021.7.6"
 tabulate = ">=0.8.9,<1"
 toolz = ">=0.11,<0.12"
 clickhouse-cityhash = { version = ">=1.0.2,<2", optional = true }
-clickhouse-driver = { version = ">=0.1,<0.3", optional = true }
+clickhouse-driver = { version = ">=0.1,<0.3", optional = true, extras = [
+  "numpy"
+] }
 dask = { version = ">=2021.10.0", optional = true, extras = [
   "array",
   "dataframe"
diff --git a/setup.py b/setup.py
index 9a2ebf828082..e37289964744 100644
--- a/setup.py
+++ b/setup.py
@@ -68,7 +68,7 @@
     ':python_version < "3.10"': ['importlib-metadata>=4,<5'],
     'all': [
         'clickhouse-cityhash>=1.0.2,<2',
-        'clickhouse-driver>=0.1,<0.3',
+        'clickhouse-driver[numpy]>=0.1,<0.3',
         'dask[array,dataframe]>=2021.10.0',
         'datafusion>=0.4,<0.6',
         'duckdb>=0.3.2,<0.4.0',
@@ -89,7 +89,7 @@
     ],
     'clickhouse': [
         'clickhouse-cityhash>=1.0.2,<2',
-        'clickhouse-driver>=0.1,<0.3',
+        'clickhouse-driver[numpy]>=0.1,<0.3',
         'lz4>=3.1.10,<5',
     ],
     'dask': ['dask[array,dataframe]>=2021.10.0', 'pyarrow>=1,<8'],