Skip to content

Commit

Permalink
🎉 New Destination: Apache Doris (#17884)
Browse files Browse the repository at this point in the history
* first commit

* normalized code and integration test

* add bootstrap.md

* add doris to destination def

* auto-bump connector version

* format files

Co-authored-by: marcosmarxm <marcosmarxm@gmail.com>
Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
  • Loading branch information
3 people authored Nov 15, 2022
1 parent b52caa0 commit 241cf4a
Show file tree
Hide file tree
Showing 24 changed files with 1,647 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
documentationUrl: https://docs.airbyte.com/integrations/destinations/amazon-sqs
icon: amazonsqs.svg
releaseStage: alpha
- name: Apache Doris
destinationDefinitionId: 05c161bf-ca73-4d48-b524-d392be417002
dockerRepository: airbyte/destination-doris
dockerImageTag: 0.1.0
documentationUrl: https://docs.airbyte.com/integrations/destinations/doris
releaseStage: alpha
- name: AWS Datalake
destinationDefinitionId: 99878c90-0fbd-46d3-9d98-ffde879d17fc
dockerRepository: airbyte/destination-aws-datalake
Expand Down
61 changes: 61 additions & 0 deletions airbyte-config/init/src/main/resources/seed/destination_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,67 @@
supportsDBT: false
supported_destination_sync_modes:
- "append"
- dockerImage: "airbyte/destination-doris:0.1.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/doris"
connectionSpecification:
$schema: "http://json-schema.org/draft-07/schema#"
title: "Doris Destination Spec"
type: "object"
required:
- "host"
- "httpport"
- "queryport"
- "username"
- "database"
properties:
host:
title: "Host"
description: "Hostname of the database"
type: "string"
order: 0
httpport:
title: "HttpPort"
description: "Http Port of the database."
type: "integer"
minimum: 0
maximum: 65536
default: 8030
examples:
- "8030"
order: 1
queryport:
title: "QueryPort"
description: "Query(SQL) Port of the database."
type: "integer"
minimum: 0
maximum: 65536
default: 9030
examples:
- "9030"
order: 2
database:
title: "DataBase Name"
description: "Name of the database."
type: "string"
order: 3
username:
title: "UserName"
description: "Username to use to access the database."
type: "string"
order: 4
password:
title: "Password"
description: "Password associated with the username."
type: "string"
airbyte_secret: true
order: 5
supportsIncremental: false
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes:
- "append"
- "overwrite"
- dockerImage: "airbyte/destination-aws-datalake:0.1.1"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/destinations/aws-datalake"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!Dockerfile
!build
18 changes: 18 additions & 0 deletions airbyte-integrations/connectors/destination-doris/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM airbyte/integration-base-java:dev AS build

WORKDIR /airbyte
ENV APPLICATION destination-doris

COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar

RUN tar xf ${APPLICATION}.tar --strip-components=1 && rm -rf ${APPLICATION}.tar

FROM airbyte/integration-base-java:dev

WORKDIR /airbyte
ENV APPLICATION destination-doris

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=0.1.0
LABEL io.airbyte.name=airbyte/destination-doris
68 changes: 68 additions & 0 deletions airbyte-integrations/connectors/destination-doris/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Destination Doris

This is the repository for the Doris destination connector in Java.
For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/doris).

## Local development

#### Building via Gradle
From the Airbyte repository root, run:
```
./gradlew :airbyte-integrations:connectors:destination-doris:build
```

#### Create credentials
**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`.
Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information.

**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials.

### Locally running the connector docker image

#### Build
Build the connector image via Gradle:
```
./gradlew :airbyte-integrations:connectors:destination-doris:airbyteDocker
```
When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in
the Dockerfile.

#### Run
Then run any of the connector commands as follows:
```
docker run --rm airbyte/destination-doris:dev spec
docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-doris:dev check --config /secrets/config.json
docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-doris:dev discover --config /secrets/config.json
docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-doris:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
```

## Testing
We use `JUnit` for Java tests.

### Unit and Integration Tests
Place unit tests under `src/test/io/airbyte/integrations/destinations/doris`.

#### Acceptance Tests
Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in
`src/test-integration/java/io/airbyte/integrations/destinations/dorisDestinationAcceptanceTest.java`.

### Using gradle to run tests
All commands should be run from airbyte project root.
To run unit tests:
```
./gradlew :airbyte-integrations:connectors:destination-doris:unitTest
```
To run acceptance and custom integration tests:
```
./gradlew :airbyte-integrations:connectors:destination-doris:integrationTest
```

## Dependency Management

### Publishing a new version of the connector
You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
1. Make sure your changes are passing unit and integration tests.
1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)).
1. Create a Pull Request.
1. Pat yourself on the back for being an awesome contributor.
1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
29 changes: 29 additions & 0 deletions airbyte-integrations/connectors/destination-doris/bootstrap.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Doris destination


Doris destination adopts MySQL protocol(JDBC) and Doris Stream Load to exchange data.

1. JDBC is used to manipulate the data table structure and execute the create table statement before data import
2. Stream Load is a synchronous import method based on HTTP/HTTPS, For Doris destination, first pre-write csv file, and then write to doris with Stream Load transaction operation.

## Introduction to Apache Doris

Apache Doris is a high-performance, real-time analytical database based on MPP architecture, known for its extreme speed and ease of use. It only requires a sub-second response time to return query results under massive data and can support not only high-concurrent point query scenarios but also high-throughput complex analysis scenarios. Based on this, Apache Doris can better meet the scenarios of report analysis, ad-hoc query, unified data warehouse, Data Lake Query Acceleration, etc. Users can build user behavior analysis, AB test platform, log retrieval analysis, user portrait analysis, order analysis, and other applications on top of this.
[https://doris.apache.org/docs/summary/basic-summary](https://doris.apache.org/docs/summary/basic-summary)


## Technical Overview
The overall architecture of Apache Doris is shown in the following figure. The Doris architecture is very simple, with only two types of processes.

#### Frontend(FE):
##### It is mainly responsible for user request access, query parsing and planning, management of metadata, and node management-related work.
#### Backend(BE):
##### It is mainly responsible for data storage and query plan execution.

Both types of processes are horizontally scalable, and a single cluster can support up to hundreds of machines and tens of petabytes of storage capacity. And these two types of processes guarantee high availability of services and high reliability of data through consistency protocols. This highly integrated architecture design greatly reduces the operation and maintenance cost of a distributed system.

Apache Doris adopts MySQL protocol, highly compatible with MySQL dialect, and supports standard SQL. Users can access Doris through various client tools and support seamless connection with BI tools.

[Stream load](https://doris.apache.org/docs/data-operate/import/import-way/stream-load-manual/) is a synchronous way of importing. Users import local files or data streams into Doris by sending HTTP protocol requests. Stream load synchronously executes the import and returns the import result. Users can directly determine whether the import is successful by the return body of the request. Stream load is mainly suitable for importing local files or data from data streams through procedures.

Each import job of Doris, whether it is batch import using Stream Load or single import using INSERT statement, is a complete transaction operation. The import transaction can ensure that the data in a batch takes effect atomically, and there will be no partial data writing.
21 changes: 21 additions & 0 deletions airbyte-integrations/connectors/destination-doris/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
plugins {
id 'application'
id 'airbyte-docker'
id 'airbyte-integration-test-java'
}

application {
mainClass = 'io.airbyte.integrations.destination.doris.DorisDestination'
}

dependencies {
implementation 'org.apache.commons:commons-csv:1.4'
implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.16'
implementation project(':airbyte-config:config-models')
implementation project(':airbyte-protocol:protocol-models')
implementation project(':airbyte-integrations:bases:base-java')
implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs)

integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test')
integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-doris')
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.integrations.destination.doris;

import com.fasterxml.jackson.databind.JsonNode;

public class DorisConnectionOptions {

private String db;
private static String DB_KEY = "database";
private String table;
private static final String TABLE_KEY = "table";

private String user;
private static final String USER_KEY = "username";

private String pwd;
private static final String PWD_KEY = "password";

private String feHost;
private static final String FE_HOST_KEY = "host";

private Integer feHttpPort;
private static final String FE_HTTP_PORT_KEY = "httpport";

private Integer feQueryPort;
private static final String FE_QUERY_PORT_KEY = "queryport";

public static DorisConnectionOptions getDorisConnection(final JsonNode config, String table) {
return new DorisConnectionOptions(
config.get(DB_KEY).asText(),
table,
config.get(USER_KEY).asText(),
config.get(PWD_KEY) == null ? "" : config.get(PWD_KEY).asText(),
config.get(FE_HOST_KEY).asText(),
config.get(FE_HTTP_PORT_KEY).asInt(8030),
config.get(FE_QUERY_PORT_KEY).asInt(9030));

}

public DorisConnectionOptions(String db, String table, String user, String pwd, String feHost, Integer feHttpPort, Integer feQueryPort) {
this.db = db;
this.table = table;
this.user = user;
this.pwd = pwd;
this.feHost = feHost;
this.feHttpPort = feHttpPort;
this.feQueryPort = feQueryPort;
}

public String getDb() {
return db;
}

public String getTable() {
return table;
}

public String getUser() {
return user;
}

public String getPwd() {
return pwd;
}

public String getFeHost() {
return feHost;
}

public Integer getFeHttpPort() {
return feHttpPort;
}

public String getHttpHostPort() {
return feHost + ":" + feHttpPort;
}

public String getQueryHostPort() {
return feHost + ":" + feHttpPort;
}

public Integer getFeQueryPort() {
return feQueryPort;
}

@Override
public String toString() {
return "DorisConnectionOptions{" +
"db='" + db + '\'' +
", table='" + table + '\'' +
", user='" + user + '\'' +
", pwd='" + pwd + '\'' +
", feHost='" + feHost + '\'' +
", feHttpPort=" + feHttpPort +
", feQueryPort=" + feQueryPort +
'}';
}

}
Loading

0 comments on commit 241cf4a

Please sign in to comment.