Skip to content

Commit

Permalink
🐛 Normalization: Decrease event buffer size (#11267)
Browse files Browse the repository at this point in the history
  • Loading branch information
edgao authored Mar 25, 2022
1 parent 6aa77bd commit 0464a10
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 27 deletions.
2 changes: 1 addition & 1 deletion airbyte-integrations/bases/base-normalization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.1.72
LABEL io.airbyte.version=0.1.73
LABEL io.airbyte.name=airbyte/normalization
13 changes: 12 additions & 1 deletion airbyte-integrations/bases/base-normalization/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,20 @@ function main() {
. /airbyte/sshtunneling.sh
openssh "${PROJECT_DIR}/ssh.json"
trap 'closessh' EXIT

# We don't run dbt 1.0.x on all destinations (because their plugins don't support it yet)
# So we need to only pass `--event-buffer-size` if it's supported by DBT.
dbt --help | grep -E -- '--event-buffer-size'
if [ $? -eq 0 ]; then
echo -e "\nDBT >=1.0.0 detected; using 10K event buffer size\n"
dbt_additional_args="--event-buffer-size=10000"
else
dbt_additional_args=""
fi

set +e # allow script to continue running even if next commands fail to run properly
# Run dbt to compile and execute the generated normalization models
dbt run --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}"
dbt ${dbt_additional_args} run --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}"
DBT_EXIT_CODE=$?
if [ ${DBT_EXIT_CODE} -ne 0 ]; then
echo -e "\nDiagnosing dbt debug to check if destination is available for dbt and well configured (${DBT_EXIT_CODE}):\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,30 +401,40 @@ def run_check_dbt_command(normalization_image: str, command: str, cwd: str, forc
"""
Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs
"""
if normalization_image.startswith("airbyte/normalization-oracle") or normalization_image.startswith("airbyte/normalization-mysql"):
dbtAdditionalArgs = []
else:
dbtAdditionalArgs = ["--event-buffer-size=10000"]

error_count = 0
commands = [
"docker",
"run",
"--rm",
"--init",
"-v",
f"{cwd}:/workspace",
"-v",
f"{cwd}/build:/build",
"-v",
f"{cwd}/logs:/logs",
"-v",
"/tmp:/tmp",
"--network",
"host",
"--entrypoint",
"/usr/local/bin/dbt",
"-i",
normalization_image,
command,
"--profiles-dir=/workspace",
"--project-dir=/workspace",
]
commands = (
[
"docker",
"run",
"--rm",
"--init",
"-v",
f"{cwd}:/workspace",
"-v",
f"{cwd}/build:/build",
"-v",
f"{cwd}/logs:/logs",
"-v",
"/tmp:/tmp",
"--network",
"host",
"--entrypoint",
"/usr/local/bin/dbt",
"-i",
normalization_image,
]
+ dbtAdditionalArgs
+ [
command,
"--profiles-dir=/workspace",
"--project-dir=/workspace",
]
)
if force_full_refresh:
commands.append("--full-refresh")
command = f"{command} --full-refresh"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# As of today, dbt-mysql doesn't support 1.0.0
# IF YOU UPGRADE DBT, make sure to also edit these files:
# 1. Remove the "normalization-mysql" entry here https://github.com/airbytehq/airbyte/pull/11267/files#diff-9a3bcae8cb5c56aa30c00548e06eade6ad771f3d4f098f6867ae9a183049dfd8R404
# 2. Check if oracle.Dockerfile is on DBT 1.0.0 yet; if it is, then revert this entire edit https://github.com/airbytehq/airbyte/pull/11267/files#diff-8880e85b2b5690accc6f15f9292a8589a6eb83564803d57c4ee74e2ee8ede09eR117-R130
FROM fishtownanalytics/dbt:0.19.0
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# As of today, dbt-oracle doesn't support 1.0.0
# IF YOU UPGRADE DBT, make sure to also edit these files:
# 1. Remove the "normalization-oracle" entry here https://github.com/airbytehq/airbyte/pull/11267/files#diff-9a3bcae8cb5c56aa30c00548e06eade6ad771f3d4f098f6867ae9a183049dfd8R404
# 2. Check if mysql.Dockerfile is on DBT 1.0.0 yet; if it is, then revert this entire edit https://github.com/airbytehq/airbyte/pull/11267/files#diff-8880e85b2b5690accc6f15f9292a8589a6eb83564803d57c4ee74e2ee8ede09eR117-R130
FROM fishtownanalytics/dbt:0.19.1

USER root
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.version=0.1.72
LABEL io.airbyte.version=0.1.73
LABEL io.airbyte.name=airbyte/normalization-snowflake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
public class NormalizationRunnerFactory {

public static final String BASE_NORMALIZATION_IMAGE_NAME = "airbyte/normalization";
public static final String NORMALIZATION_VERSION = "0.1.72";
public static final String NORMALIZATION_VERSION = "0.1.73";

static final Map<String, ImmutablePair<String, DefaultNormalizationRunner.DestinationType>> NORMALIZATION_MAPPING =
ImmutableMap.<String, ImmutablePair<String, DefaultNormalizationRunner.DestinationType>>builder()
Expand Down
1 change: 1 addition & 0 deletions docs/understanding-airbyte/basic-normalization.md
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ Therefore, in order to "upgrade" to the desired normalization version, you need

| Airbyte Version | Normalization Version | Date | Pull Request | Subject |
|:----------------| :--- | :--- | :--- | :--- |
| 0.35.60-alpha | 0.1.73 | 2022-03-25 | [\#11267](https://github.com/airbytehq/airbyte/pull/11267) | Set `--event-buffer-size` to reduce memory usage |
| 0.35.59-alpha | 0.1.72 | 2022-03-24 | [\#11093](https://github.com/airbytehq/airbyte/pull/11093) | Added Snowflake OAuth2.0 support |
| 0.35.53-alpha | 0.1.71 | 2022-03-14 | [\#11077](https://github.com/airbytehq/airbyte/pull/11077) | Enable BigQuery to handle project ID embedded inside dataset ID |
| 0.35.49-alpha | 0.1.70 | 2022-03-11 | [\#11051](https://github.com/airbytehq/airbyte/pull/11051) | Upgrade dbt to 1.0.0 (except for MySQL and Oracle) |
Expand Down

0 comments on commit 0464a10

Please sign in to comment.