Skip to content

Commit

Permalink
feat(druid): add Apache Druid backend
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and kszucs committed Feb 28, 2023
1 parent 91bef71 commit c4cc2a6
Show file tree
Hide file tree
Showing 42 changed files with 1,470 additions and 252 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ jobs:
- postgres
services:
- trino
- name: druid
title: Druid
extras:
- druid
services:
- druid
include:
- os: ubuntu-latest
python-version: "3.10"
Expand Down Expand Up @@ -221,6 +227,14 @@ jobs:
extras:
- trino
- postgres
- os: windows-latest
backend:
name: druid
title: Druid
extras:
- druid
services:
- druid
steps:
- name: update and install system dependencies
if: matrix.os == 'ubuntu-latest' && matrix.backend.sys-deps != null
Expand Down
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,21 @@ Ibis aims to be a future-proof solution to interacting with data using Python an

Ibis acts as a universal frontend to the following systems:

- [Apache Impala](https://ibis-project.org/backends/Impala/)
- [BigQuery](https://ibis-project.org/backends/BigQuery/)
- [ClickHouse](https://ibis-project.org/backends/ClickHouse/)
- [Dask](https://ibis-project.org/backends/Dask/)
- [Druid](https://ibis-project.org/backends/Druid/) (experimental)

This comment has been minimized.

Copy link
@krzysztof-kwitt

krzysztof-kwitt Feb 28, 2023

Contributor

The ASF prefers you use the full Apache ProjectName name for all our projects.

- [DuckDB](https://ibis-project.org/backends/DuckDB/)
- [Google BigQuery](https://ibis-project.org/backends/BigQuery/)
- [HeavyAI](https://github.com/heavyai/ibis-heavyai)
- [Impala](https://ibis-project.org/backends/Impala/)

This comment has been minimized.

Copy link
@krzysztof-kwitt

krzysztof-kwitt Feb 28, 2023

Contributor

Impala is also maintained by Apache foundation.

- [MySQL](https://ibis-project.org/backends/MySQL/)
- [Microsoft SQL Server](https://ibis-project.org/backends/MSSQL/)
- [Pandas](https://ibis-project.org/backends/Pandas/)
- [Polars](https://ibis-project.org/backends/Polars/)
- [Polars](https://ibis-project.org/backends/Polars/) (experimental)
- [PostgreSQL](https://ibis-project.org/backends/PostgreSQL/)
- [PySpark](https://ibis-project.org/backends/PySpark/)
- [Snowflake](https://ibis-project.org/backends/Snowflake) (experimental)
- [SQL Server](https://ibis-project.org/backends/MSSQL/)
- [SQLite](https://ibis-project.org/backends/SQLite/)
- [Snowflake](https://ibis-project.org/backends/Snowflake) (experimental)
- [Trino](https://ibis-project.org/backends/Trino/) (experimental)

The list of supported backends is continuously growing. Anyone can get involved
Expand Down
47 changes: 47 additions & 0 deletions ci/schema/druid.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
REPLACE INTO "diamonds"
OVERWRITE ALL
SELECT *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/diamonds.csv"]}',
'{"type":"csv", "findColumnsFromHeader":true}',
'[{"name":"carat","type":"double"},{"name":"cut","type":"string"},{"name":"color","type":"string"},{"name":"clarity","type":"string"},{"name":"depth","type":"double"},{"name":"table","type":"double"},{"name":"price","type":"long"},{"name":"x","type":"double"},{"name":"y","type":"double"},{"name":"z","type":"double"}]'
)
)
PARTITIONED BY ALL TIME;

REPLACE INTO "batting"
OVERWRITE ALL
SELECT *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/batting.csv"]}',
'{"type":"csv", "findColumnsFromHeader":true}',
'[{"name":"playerID","type":"string"},{"name":"yearID","type":"long"},{"name":"stint","type":"long"},{"name":"teamID","type":"string"},{"name":"lgID","type":"string"},{"name":"G","type":"long"},{"name":"AB","type":"long"},{"name":"R","type":"long"},{"name":"H","type":"long"},{"name":"X2B","type":"long"},{"name":"X3B","type":"long"},{"name":"HR","type":"long"},{"name":"RBI","type":"long"},{"name":"SB","type":"long"},{"name":"CS","type":"long"},{"name":"BB","type":"long"},{"name":"SO","type":"long"},{"name":"IBB","type":"long"},{"name":"HBP","type":"long"},{"name":"SH","type":"long"},{"name":"SF","type":"long"},{"name":"GIDP","type":"long"}]'
)
)
PARTITIONED BY ALL TIME;

REPLACE INTO "awards_players"
OVERWRITE ALL
SELECT *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/awards_players.csv"]}',
'{"type":"csv", "findColumnsFromHeader":true}',
'[{"name":"playerID","type":"string"},{"name":"awardID","type":"string"},{"name":"yearID","type":"long"},{"name":"lgID","type":"string"},{"name":"tie","type":"string"},{"name":"notes","type":"string"}]'
)
)
PARTITIONED BY ALL TIME;

REPLACE INTO "functional_alltypes"
OVERWRITE ALL
SELECT TIME_PARSE(timestamp_col) AS __time, *
FROM TABLE(
EXTERN(
'{"type":"local","files":["/opt/shared/functional_alltypes.csv"]}',
'{"type":"csv", "findColumnsFromHeader":true}',
'[{"name":"index","type":"long"},{"name":"Unnamed: 0","type":"long"},{"name":"id","type":"long"},{"name":"bool_col","type":"long"},{"name":"tinyint_col","type":"long"},{"name":"smallint_col","type":"long"},{"name":"int_col","type":"long"},{"name":"bigint_col","type":"long"},{"name":"float_col","type":"double"},{"name":"double_col","type":"double"},{"name":"date_string_col","type":"string"},{"name":"string_col","type":"string"},{"name":"timestamp_col","type":"string"},{"name":"year","type":"long"},{"name":"month","type":"long"}]'
)
)
PARTITIONED BY ALL TIME;
175 changes: 175 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,185 @@ services:
- $PWD/docker/trino/catalog/memory.properties:/etc/trino/catalog/memory.properties:ro
- $PWD/docker/trino/jvm.config:/etc/trino/jvm.config:ro

druid-postgres:
image: postgres:15.2-alpine
container_name: druid-postgres
volumes:
- metadata_data:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 5432
networks:
- druid

# Need 3.5 or later for container nodes
druid-zookeeper:
hostname: zookeeper
container_name: zookeeper
image: zookeeper:3.5
environment:
- ZOO_MY_ID=1
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 2181
networks:
- druid

druid-coordinator:
image: apache/druid:25.0.0
hostname: coordinator
container_name: coordinator
volumes:
- druid:/opt/shared
- coordinator_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
command:
- coordinator
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8081
env_file:
- ./docker/druid/environment
networks:
- druid

druid-broker:
image: apache/druid:25.0.0
hostname: broker
container_name: broker
volumes:
- broker_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
- druid-coordinator
command:
- broker
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8082
ports:
- "8082:8082"
env_file:
- ./docker/druid/environment
networks:
- druid

druid-historical:
image: apache/druid:25.0.0
hostname: historical
container_name: historical
volumes:
- druid:/opt/shared
- historical_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
- druid-coordinator
command:
- historical
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8083
env_file:
- ./docker/druid/environment
networks:
- druid

druid-middlemanager:
image: apache/druid:25.0.0
hostname: middlemanager
container_name: middlemanager
volumes:
- druid:/opt/shared
- middle_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
- druid-coordinator
command:
- middleManager
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8091
env_file:
- ./docker/druid/environment
networks:
- druid

druid:
image: apache/druid:25.0.0
hostname: router
container_name: router
volumes:
- router_var:/opt/druid/var
depends_on:
- druid-zookeeper
- druid-postgres
- druid-coordinator
- druid-middlemanager
- druid-historical
- druid-broker
ports:
- "8888:8888"
command:
- router
healthcheck:
interval: 10s
retries: 9
timeout: 90s
test:
- CMD-SHELL
- nc -z 127.0.0.1 8888
env_file:
- ./docker/druid/environment
networks:
- druid

networks:
impala:
mysql:
mssql:
clickhouse:
postgres:
trino:
druid:

volumes:
metadata_data:
middle_var:
historical_var:
broker_var:
coordinator_var:
router_var:
druid:
55 changes: 55 additions & 0 deletions docker/druid/environment
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

# Java tuning
DRUID_XMX=2g
DRUID_XMS=64m
DRUID_MAXNEWSIZE=128m
DRUID_NEWSIZE=64m
DRUID_MAXDIRECTMEMORYSIZE=2g

druid_emitter_logging_logLevel=debug

druid_extensions_loadList=["postgresql-metadata-storage", "druid-multi-stage-query"]

druid_zk_service_host=zookeeper

druid_worker_capacity=6
druid_generic_useDefaultValueForNull=true

druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
druid_metadata_storage_connector_connectURI=jdbc:postgresql://druid-postgres:5432/druid
druid_metadata_storage_connector_user=druid
druid_metadata_storage_connector_password=FoolishPassword

druid_coordinator_balancer_strategy=cachingCost

druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms64m", "-XX:MaxDirectMemorySize=2g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=64MiB

druid_storage_type=local
druid_storage_storageDirectory=/opt/shared/segments
druid_indexer_logs_type=file
druid_indexer_logs_directory=/opt/shared/indexing-logs

druid_processing_numThreads=1
druid_processing_numMergeBuffers=1

DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
11 changes: 11 additions & 0 deletions docs/backends/Druid.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
backend_name: Druid
backend_url: https://druid.apache.org/
backend_module: druid
backend_param_style: a SQLAlchemy connection string
backend_connection_example: ibis.connect("druid://localhost:8082/druid/v2/sql")
is_experimental: true
version_added: "5.0"
---

{% include 'backends/template.md' %}
1 change: 1 addition & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
PGPASSWORD = "postgres";
MYSQL_PWD = "ibis";
MSSQL_SA_PASSWORD = "1bis_Testing!";
DRUID_URL = "druid://localhost:8082/druid/v2/sql";
};
in
rec {
Expand Down
Loading

0 comments on commit c4cc2a6

Please sign in to comment.