From 112dad4ff269024a0be435de893e50abc73f9bf6 Mon Sep 17 00:00:00 2001 From: Lewis Hemens Date: Thu, 31 Aug 2023 10:37:32 +0100 Subject: [PATCH] Sandbox2 wrapper (#1536) * Progress adding sandbox API * Fixed versioning * Working example, if you make runfiles visible * Working node spawn but sketchy af * Progress using non fork method, but should probably ue forking method * Fixed policy enforcement of node process, but still execveat error. Should try using bazel pathed nodejs version * Added bazel build nodejs binary path instead of from usr root * Progress, still stuch on execveat issue * Tidy * Swap to dynamic startup * Progress passing compile.js to sandboxed script * Swapped to running compile_loader.js, added custom absolute path finder. Still have fd > -1 error though * Simplify input * Strip fd stuff * Tidy * Remove runfiles build dependency * Remove import, format * Add fd inferral, binary run seems to be failing silently * Update to latest SAPI, timplify and tidy * Entry point experimentation * Progress updating policies * More allowed syscalls, with explanations * Tidy policy order * Working policies * Progress setting up test * Tidy some logging * Update entry point to new worker bundle locale * Bump protobufjs from 6.8.8 to 6.11.3 (#1346) * Bump protobufjs from 6.8.8 to 6.11.3 Bumps [protobufjs](https://github.com/protobufjs/protobuf.js) from 6.8.8 to 6.11.3. - [Release notes](https://github.com/protobufjs/protobuf.js/releases) - [Changelog](https://github.com/protobufjs/protobuf.js/blob/v6.11.3/CHANGELOG.md) - [Commits](https://github.com/protobufjs/protobuf.js/compare/6.8.8...v6.11.3) --- updated-dependencies: - dependency-name: protobufjs dependency-type: direct:production ... Signed-off-by: dependabot[bot] * Update protobufjs dep versions * Upgrade to protobufjs v7.0.0, node to 16x * Bump bazel to version 5.2.0 * Bump test CI to correct bazel version * 3.5.0 * 3.5.0 Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lewis Hemens * Require defaultLocation to be set in BigQuery (#1353) * Enforce defaultLocation requirement in core and cli initialization * Add a test * Bump version * Update publish script branch (#1354) * Explicitly set path in dataform.json location validation error (#1355) * Audit npm/yarn packages and fix (#1356) * Audit npm/yarn packages and fix * Remove docs build/push scripts and triggers * Cut version 2.0.0 (#1361) * Update @dataform/core to never return a graph with non-unique action names. (#1366) * Update @dataform/core to never return a graph with non-unique action names. * Include a compilation error for each removed action. * Rename a shadowed variable * Increment DF_VERSION * Bump moment-timezone from 0.5.28 to 0.5.37 (#1370) Bumps [moment-timezone](https://github.com/moment/moment-timezone) from 0.5.28 to 0.5.37. - [Release notes](https://github.com/moment/moment-timezone/releases) - [Changelog](https://github.com/moment/moment-timezone/blob/develop/changelog.md) - [Commits](https://github.com/moment/moment-timezone/compare/0.5.28...0.5.37) --- updated-dependencies: - dependency-name: moment-timezone dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Upgrade vm2. (#1372) * Upgrade vm2. * bump version * Publish core.proto within the @dataform/core package (#1378) * Publish core.proto within the @dataform/core package * Bump version * Update readme.md (#1397) * Update readme.md * Update readme.md * Enable `main` compilation by default for all @dataform/core versions >= `2.0.4`. (#1401) * Use main compilation by default for all @dataform/core versions >= 2.0.4. * use semver.subset * Bump decode-uri-component from 0.2.0 to 0.2.2 (#1404) Bumps [decode-uri-component](https://github.com/SamVerschueren/decode-uri-component) from 0.2.0 to 0.2.2. - [Release notes](https://github.com/SamVerschueren/decode-uri-component/releases) - [Commits](https://github.com/SamVerschueren/decode-uri-component/compare/v0.2.0...v0.2.2) --- updated-dependencies: - dependency-name: decode-uri-component dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Main <- Master (#1407) * typo (#1364) * Buttons label fix (#1365) "Read the blog post" was linking to the package page and "Visit the package page" was linking to the blog post. * Update assertions.md (#1377) Co-authored-by: Dolan Co-authored-by: Stuart Lewis <75398266+stulew93@users.noreply.github.com> * Make table level assertions have the same tags as the table (#1406) * Make table level assertions have the same tags as the table * add tag propagation post assertion adding * Update version to 2.1.0 * Bump qs from 6.5.2 to 6.5.3 (#1408) Bumps [qs](https://github.com/ljharb/qs) from 6.5.2 to 6.5.3. - [Release notes](https://github.com/ljharb/qs/releases) - [Changelog](https://github.com/ljharb/qs/blob/main/CHANGELOG.md) - [Commits](https://github.com/ljharb/qs/compare/v6.5.2...v6.5.3) --- updated-dependencies: - dependency-name: qs dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump express from 4.17.1 to 4.17.3 (#1409) Bumps [express](https://github.com/expressjs/express) from 4.17.1 to 4.17.3. - [Release notes](https://github.com/expressjs/express/releases) - [Changelog](https://github.com/expressjs/express/blob/master/History.md) - [Commits](https://github.com/expressjs/express/compare/4.17.1...4.17.3) --- updated-dependencies: - dependency-name: express dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump go.mongodb.org/mongo-driver from 1.2.0 to 1.5.1 (#1357) Bumps [go.mongodb.org/mongo-driver](https://github.com/mongodb/mongo-go-driver) from 1.2.0 to 1.5.1. - [Release notes](https://github.com/mongodb/mongo-go-driver/releases) - [Commits](https://github.com/mongodb/mongo-go-driver/compare/v1.2.0...v1.5.1) --- updated-dependencies: - dependency-name: go.mongodb.org/mongo-driver dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Minor typo correction (#1360) * Remove @dataform/sql package and common_v1 example (#1415) * Remove @dataform/sql package and common_v1 example * Remove SQL package from common_v2 example * Remove build imports from tests of @dataform/sql that weren't doing anything * Inline timestamp definition in non BQ tests * Fix timestamp function case * Bump json5 from 1.0.1 to 1.0.2 (#1416) Bumps [json5](https://github.com/json5/json5) from 1.0.1 to 1.0.2. - [Release notes](https://github.com/json5/json5/releases) - [Changelog](https://github.com/json5/json5/blob/main/CHANGELOG.md) - [Commits](https://github.com/json5/json5/compare/v1.0.1...v1.0.2) --- updated-dependencies: - dependency-name: json5 dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Remove dead code paths and those not relating to ELT management (#1414) * Remove code paths not relating to ELT management * Remove Dataform SQL package * Remove unused workspace imports * Remove common_v1 example * Remove some v1 * Remove @dataform/sql package and common_v1 example * Remove SQL package from common_v2 example * Remove build imports from tests of @dataform/sql that weren't doing anything * Inline timestamp definition in non BQ tests * Fix timestamp function case * Restore v1 backwards_compatibility tests * Environments and schedules (#1418) * Remove DB tools dependence on `/api` (#1417) * Remove DB tools dependence on @dataform/api in prep for removal * Remove unneccessary options * Remove more * More simplifying * Fix import order * Tweak postgres connection wait * Remove console log * Remove promise pool from presto test * Remove @dataform/assertion-utils in favor of auto-assertions (#1419) * Remove @dataform/assertion-utils in favor of auto-assertions * Fix action count * Fix quotes in tests * Fix bazel build //... for our distro by bumping versions (#1421) * Fix bazel build //... for our distro by bumping versions * Bump bazel to 5.4.0 * Update contributing (#1425) * Remove docs content (#1426) * Remove docs content * Add note to main readme about docs transition * Fix contributing link * Update base dataform image to use more recent Node version (#1427) * Update base dataform image to use more recent Node version - also bump some required deps * Bump go version too * Upgrade vm2 version. (#1431) * Added schema function (#1432) * Removed redundant toResolvable calls * Refactored resolve/normalize in session * Added schema function to common context objects * Removed unused session import * Bumped minor version * Simplified schema assertions in core.spec.ts * Refactored normalize* => finalize* in session helpers * Refactored with/without suffix tests to run as parameterized tests * Removed redundant 'Test' import * Bump ua-parser-js from 0.7.28 to 0.7.33 (#1433) Bumps [ua-parser-js](https://github.com/faisalman/ua-parser-js) from 0.7.28 to 0.7.33. - [Release notes](https://github.com/faisalman/ua-parser-js/releases) - [Changelog](https://github.com/faisalman/ua-parser-js/blob/master/changelog.md) - [Commits](https://github.com/faisalman/ua-parser-js/compare/0.7.28...0.7.33) --- updated-dependencies: - dependency-name: ua-parser-js dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Refactored name() function to finalize name (#1435) * Database function (#1438) * Refactored name() function to finalize name * Added database() function * Bumped minor version to 2.3.0 * Refactored database() to add an error when the database is undefined * Inlined database & ref error messages * Change sample command to run tests (#1439) * Update contributing.md (#1446) * Added includeDependents flag to run command * Highlight that dev branches should branch off main * Fix missing credentials error by adding it as the default option (#1447) * fix missing include dependents (#1448) * fix missing include dependents * bump version * Fix docker cmd for image (#1456) * Fix docker cmd for image * Replace with empty cmd instead of help * Added enum for table's type field (#1453) * added enum for table's type field Signed-off-by: Nick Nalivaika * update version Signed-off-by: Nick Nalivaika * simplify code by filling enumType field for tables when loading compiled graphs * small fix for biquery adapter * make linter happy * fix integration tests * do not call setOrValidateTableEnumType when check for table validity, just check enumType value --------- Signed-off-by: Nick Nalivaika * Adds a flag 'track' to allow users to configure analytics with flags (#1450) * Adds a flag 'no-track' to allow users to configure analytics with flags * Rework so that no-track bypasses settings.json * Linting * Move to --track, fix an issue with --track=true, doc changes * Update boolean type * Update version * Remove code which always executes auto-assertions. (#1460) * Remove code which always executes auto-assertions. * fix failing test * Throw an error for non-string `vars`. (#1461) * Add job prefix option to the CLI (#1449) * Add job prefix option to the CLI * format * Fix removal of dataform- prefix * Fix import order * Import order take 2 * Default empty execution options fix * fix parameter placement * Bump vm2 from 3.9.13 to 3.9.15 (#1473) Bumps [vm2](https://github.com/patriksimek/vm2) from 3.9.13 to 3.9.15. - [Release notes](https://github.com/patriksimek/vm2/releases) - [Changelog](https://github.com/patriksimek/vm2/blob/master/CHANGELOG.md) - [Commits](https://github.com/patriksimek/vm2/compare/3.9.13...3.9.15) --- updated-dependencies: - dependency-name: vm2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump vm2 from 3.9.15 to 3.9.16 (#1476) Bumps [vm2](https://github.com/patriksimek/vm2) from 3.9.15 to 3.9.16. - [Release notes](https://github.com/patriksimek/vm2/releases) - [Changelog](https://github.com/patriksimek/vm2/blob/master/CHANGELOG.md) - [Commits](https://github.com/patriksimek/vm2/compare/3.9.15...3.9.16) --- updated-dependencies: - dependency-name: vm2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump vm2 from 3.9.16 to 3.9.17 (#1478) Bumps [vm2](https://github.com/patriksimek/vm2) from 3.9.16 to 3.9.17. - [Release notes](https://github.com/patriksimek/vm2/releases) - [Changelog](https://github.com/patriksimek/vm2/blob/master/CHANGELOG.md) - [Commits](https://github.com/patriksimek/vm2/compare/3.9.16...3.9.17) --- updated-dependencies: - dependency-name: vm2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Remove/update dependencies resulting in updated fsevents dependency version. (#1481) * Add `schema` & `database` to contextFunctions in compile time (#1485) * Upgrade vm2 to `3.9.19`. (#1491) * Remove old vm2 dependency. (#1493) * Bump loader-utils from 1.2.3 to 1.4.2 (#1492) Bumps [loader-utils](https://github.com/webpack/loader-utils) from 1.2.3 to 1.4.2. - [Release notes](https://github.com/webpack/loader-utils/releases) - [Changelog](https://github.com/webpack/loader-utils/blob/v1.4.2/CHANGELOG.md) - [Commits](https://github.com/webpack/loader-utils/compare/v1.2.3...v1.4.2) --- updated-dependencies: - dependency-name: loader-utils dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump minimatch from 3.0.4 to 3.1.2 (#1495) Bumps [minimatch](https://github.com/isaacs/minimatch) from 3.0.4 to 3.1.2. - [Changelog](https://github.com/isaacs/minimatch/blob/main/changelog.md) - [Commits](https://github.com/isaacs/minimatch/compare/v3.0.4...v3.1.2) --- updated-dependencies: - dependency-name: minimatch dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Release `2.5.0`: to allow use of methods added to SQLX in https://github.com/dataform-co/dataform/pull/1485. (#1498) * Separate sqlx build target into sqlx and format (#1499) * Separate sqlx build target into sqlx and format * Fix build dependencies that used //sqlx * Update sql-formatter & specify SQL language according to warehouse (#1490) * Update sql-formatter * Fix placeholder to be treated as a identifier * Pass language argument & tweak tests * Add test cases for named arguments & QUALIFY clause * Pass language according to warehouse from cli * Include thrown error message while parsing dataform.json in format * Revert using ErrorWithCause & just include message * Move warehouse and sql language mapping * Add user-defined type guard to handle WarehouseType * Update format subcommand to use WarehouseType * Bump minor version to 2.6.0 * Fix trailing ; * Fix code format & tslint issues * Bump snowflake-sdk from 1.6.12 to 1.6.21 (#1500) Bumps [snowflake-sdk](https://github.com/snowflakedb/snowflake-connector-nodejs) from 1.6.12 to 1.6.21. - [Release notes](https://github.com/snowflakedb/snowflake-connector-nodejs/releases) - [Commits](https://github.com/snowflakedb/snowflake-connector-nodejs/compare/v1.6.12...v1.6.21) --- updated-dependencies: - dependency-name: snowflake-sdk dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump semver from 7.3.8 to 7.5.2 (#1507) Bumps [semver](https://github.com/npm/node-semver) from 7.3.8 to 7.5.2. - [Release notes](https://github.com/npm/node-semver/releases) - [Changelog](https://github.com/npm/node-semver/blob/main/CHANGELOG.md) - [Commits](https://github.com/npm/node-semver/compare/v7.3.8...v7.5.2) --- updated-dependencies: - dependency-name: semver dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump fast-xml-parser from 4.2.4 to 4.2.5 (#1508) Bumps [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser) from 4.2.4 to 4.2.5. - [Release notes](https://github.com/NaturalIntelligence/fast-xml-parser/releases) - [Changelog](https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/CHANGELOG.md) - [Commits](https://github.com/NaturalIntelligence/fast-xml-parser/compare/v4.2.4...v4.2.5) --- updated-dependencies: - dependency-name: fast-xml-parser dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Fix image link in readme.md (#1506) * Enable formatting for triple-quoted strings (#1505) * Add test cases for tirple-quoted string * Add a todo test case for formatting * Enhance lexer to handle triple-quoted string * Fix tslint issues * Fix typos * Bump protobufjs from 7.0.0 to 7.2.4 (#1510) Bumps [protobufjs](https://github.com/protobufjs/protobuf.js) from 7.0.0 to 7.2.4. - [Release notes](https://github.com/protobufjs/protobuf.js/releases) - [Changelog](https://github.com/protobufjs/protobuf.js/blob/master/CHANGELOG.md) - [Commits](https://github.com/protobufjs/protobuf.js/compare/protobufjs-v7.0.0...protobufjs-v7.2.4) --- updated-dependencies: - dependency-name: protobufjs dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update version.bzl (#1516) * Include multiline string literals when creating statements (#1520) * Include multiline string literals when creating statements * Bump version to 2.6.2 * Remove unused tools and add vscode package-lock.json (#1521) * Revert "Update entry point to new worker bundle locale" This reverts commit 7c970c385ee505b26709abbeb6960ecb3c7711c4. * Revert "Revert "Update entry point to new worker bundle locale"" This reverts commit 6fdb5f6eba860018df97ddec4be90b7ddd592f68. * Fix merge to be from main_v1 * Progress re-adding sandbox/vm bundle * Some tweaks as a hacky way to make deps work * Move to minimal bash invocation example * Cleanup to minimal node invocation * Checkpoint * Tests nearly working * Can run compile, but tests failing * Exit process to avoid sandbox breakage * Tests passing with tight policies * Make sandbox2 usage configurable * Remove some unneeded tests * Remove linting * Disable integration, cli tests * Review comments --------- Signed-off-by: dependabot[bot] Signed-off-by: Nick Nalivaika Co-authored-by: Elias Kassell Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: diasdauletov <110160920+diasdauletov@users.noreply.github.com> Co-authored-by: BenBirt Co-authored-by: ghh Co-authored-by: Dolan Co-authored-by: Stuart Lewis <75398266+stulew93@users.noreply.github.com> Co-authored-by: Joshua Brandon Co-authored-by: Alito Alvarez Co-authored-by: Andres Lowrie Co-authored-by: Nick Nalivayka Co-authored-by: George McGowan Co-authored-by: pokutuna Co-authored-by: VIKRAM SINGH --- .bazelrc | 7 + WORKSPACE | 54 +- api/BUILD | 8 + api/commands/compile.ts | 101 ++- packages/@dataform/cli/worker.ts | 5 +- sandbox/BUILD | 26 + sandbox/compile_executor.cc | 254 ++++++ sandbox/vm/compile.ts | 20 +- sandbox/worker/BUILD | 78 ++ sandbox/worker/webpack.config.js | 42 + sandbox/worker/worker.ts | 3 + scripts/cloudbuild/bazel_test | 6 +- tests/api/BUILD | 1 + tests/api/examples.spec.ts | 1426 +++++++++++++++--------------- tools/common/copy.bzl | 14 + 15 files changed, 1277 insertions(+), 768 deletions(-) create mode 100644 sandbox/BUILD create mode 100644 sandbox/compile_executor.cc create mode 100644 sandbox/worker/BUILD create mode 100644 sandbox/worker/webpack.config.js create mode 100644 sandbox/worker/worker.ts diff --git a/.bazelrc b/.bazelrc index b6c3242af..777fde216 100644 --- a/.bazelrc +++ b/.bazelrc @@ -10,3 +10,10 @@ build:remote-cache --remote_instance_name=projects/dataform-corp/instances/dataf build:remote-cache --remote_timeout=3600 build:remote-cache --auth_enabled=true build:remote-cache --google_default_credentials=true + +# Build in C++17 mode without a custom CROSSTOOL +build --cxxopt=-std=c++17 + +# TODO(https://github.com/bazelbuild/bazel/issues/8672): Remove this workaround +# This will become the default in Bazel 4.0 +build --incompatible_linkopts_to_linklibs \ No newline at end of file diff --git a/WORKSPACE b/WORKSPACE index 9ea47a1d3..273148cda 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,6 +1,8 @@ workspace(name = "df") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository", "new_git_repository") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file") +load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") http_archive( name = "bazel_skylib", @@ -33,6 +35,54 @@ load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") protobuf_deps() +git_repository( + name = "com_google_sandboxed_api", + commit = "19fd11b91e6678db2fcfe69dd4037605730f5317", + remote = "https://github.com/google/sandboxed-api.git", + shallow_since = "1610629882 -0800", +) + +http_archive( + name = "enum34_archive", + build_file = "@com_google_sandboxed_api//sandboxed_api/bazel/external:enum34.BUILD", + sha256 = "8ad8c4783bf61ded74527bffb48ed9b54166685e4230386a9ed9b1279e2df5b1", + strip_prefix = "enum34-1.1.6", + urls = ["https://files.pythonhosted.org/packages/bf/3e/31d502c25302814a7c2f1d3959d2a3b3f78e509002ba91aea64993936876/enum34-1.1.6.tar.gz"], +) + +load("@com_google_sandboxed_api//sandboxed_api/bazel:sapi_deps.bzl", "sapi_deps") + +sapi_deps() + +http_archive( + name = "net_zlib", + build_file = "@com_google_sandboxed_api//sandboxed_api:bazel/external/zlib.BUILD", + patch_args = ["-p1"], + patches = ["@com_google_sandboxed_api//sandboxed_api:bazel/external/zlib.patch"], + sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", + strip_prefix = "zlib-1.2.11", + urls = [ + "https://mirror.bazel.build/zlib.net/zlib-1.2.11.tar.gz", + "https://www.zlib.net/zlib-1.2.11.tar.gz", + ], +) + +maybe( + http_archive, + name = "com_google_googletest", + sha256 = "a6ab7c7d6fd4dd727f6012b5d85d71a73d3aa1274f529ecd4ad84eb9ec4ff767", + strip_prefix = "googletest-dcc92d0ab6c4ce022162a23566d44f673251eee4", + urls = ["https://github.com/google/googletest/archive/dcc92d0ab6c4ce022162a23566d44f673251eee4.zip"], +) + +maybe( + http_archive, + name = "com_google_benchmark", + sha256 = "7f45be0bff07d787d75c3864212e9ea5ebba57593b2e487c783d11da70ef6857", + strip_prefix = "benchmark-56898e9a92fba537671d5462df9c5ef2ea6a823a", + urls = ["https://github.com/google/benchmark/archive/56898e9a92fba537671d5462df9c5ef2ea6a823a.zip"], +) + http_archive( name = "build_bazel_rules_nodejs", sha256 = "d14076339deb08e5460c221fae5c5e9605d2ef4848eee1f0c81c9ffdc1ab31c1", @@ -124,9 +174,9 @@ load("@io_bazel_rules_docker//repositories:deps.bzl", container_deps = "deps") container_deps() -load("@io_bazel_rules_docker//repositories:pip_repositories.bzl", "pip_deps") +load("@io_bazel_rules_docker//repositories:py_repositories.bzl", "py_deps") -pip_deps() +py_deps() load( "@io_bazel_rules_docker//container:container.bzl", diff --git a/api/BUILD b/api/BUILD index 9c42c8542..4a3699a7d 100644 --- a/api/BUILD +++ b/api/BUILD @@ -8,6 +8,12 @@ ts_library( ["**/*.ts"], exclude = ["utils/**/*.*"], ), + data = [ + "@nodejs_linux_amd64//:node_bin", + "//sandbox:compile_executor", + "//sandbox/worker:worker_bundle.js", + "//sandbox/worker:node_modules" + ], deps = [ "//api/utils", "//common/errors", @@ -32,6 +38,7 @@ ts_library( "@npm//@types/pg-query-stream", "@npm//@types/semver", "@npm//@types/ssh2", + "@npm//@types/uuid", "@npm//cron-parser", "@npm//deepmerge", "@npm//get-port", @@ -41,6 +48,7 @@ ts_library( "@npm//mssql", "@npm//object-hash", "@npm//pg", + "@npm//uuid", "@npm//pg-query-stream", "@npm//presto-client", "@npm//pretty-ms", diff --git a/api/commands/compile.ts b/api/commands/compile.ts index bedb20b2a..7692321aa 100644 --- a/api/commands/compile.ts +++ b/api/commands/compile.ts @@ -1,12 +1,16 @@ import * as fs from "fs"; import * as path from "path"; +import * as net from "net"; +import { promisify } from "util"; +import * as os from "os"; -import { ChildProcess, fork } from "child_process"; +import { ChildProcess, fork, spawn } from "child_process"; import deepmerge from "deepmerge"; import { validWarehouses } from "df/api/dbadapters"; import { coerceAsError, ErrorWithCause } from "df/common/errors/errors"; -import { decode64 } from "df/common/protos"; +import { decode64, encode64 } from "df/common/protos"; import { dataform } from "df/protos/ts"; +import { v4 as uuid } from "uuid"; // Project config properties that are required. const mandatoryProps: Array = ["warehouse", "defaultSchema"]; @@ -20,13 +24,14 @@ const simpleCheckProps: Array = [ "defaultSchema" ]; -export class CompilationTimeoutError extends Error {} +export class CompilationTimeoutError extends Error { } export async function compile( - compileConfig: dataform.ICompileConfig = {} + compileConfig: dataform.ICompileConfig = {}, + useSandbox2?: boolean, ): Promise { // Resolve the path in case it hasn't been resolved already. - path.resolve(compileConfig.projectDir); + compileConfig = { ...compileConfig, projectDir: path.resolve(compileConfig.projectDir) } try { // check dataformJson is valid before we try to compile @@ -40,32 +45,47 @@ export async function compile( ); } - const result = await CompileChildProcess.forkProcess().compile(compileConfig); + var result: string = ""; - if (compileConfig.useMain) { - const decodedResult = decode64(dataform.CoreExecutionResponse, result); - return dataform.CompiledGraph.create(decodedResult.compile.compiledGraph); + const socketPath = `/tmp/${uuid()}.sock`; + + if (fs.existsSync(socketPath)) { + fs.unlinkSync(socketPath); } + const server = net.createServer((socket) => { + socket.on("data", (buf) => { + result += buf.toString(); + }); + }); + + server.listen(socketPath); + + await CompileChildProcess.forkProcess(socketPath, { ...compileConfig, useMain: false }, useSandbox2).timeout(compileConfig.timeoutMillis || 5000); - return decode64(dataform.CompiledGraph, result); + await promisify(server.close.bind(server))(); + + if (result.startsWith("ERROR:")) { + throw coerceAsError(JSON.parse(result.substring(6))); + } + const decodedResult = decode64(dataform.CompiledGraph, result); + return decodedResult; } export class CompileChildProcess { - public static forkProcess() { - // Runs the worker_bundle script we generate for the package (see packages/@dataform/cli/BUILD) - // if it exists, otherwise run the bazel compile loader target. - const findForkScript = () => { - try { - const workerBundlePath = require.resolve("./worker_bundle"); - return workerBundlePath; - } catch (e) { - return require.resolve("../../sandbox/vm/compile_loader"); - } - }; - const forkScript = findForkScript(); - return new CompileChildProcess( - fork(require.resolve(forkScript), [], { stdio: [0, 1, 2, "ipc", "pipe"] }) - ); + public static forkProcess(socket: string, compileConfig: dataform.ICompileConfig, useSandbox2: boolean) { + const platformPath = os.platform() === "darwin" ? "nodejs_darwin_amd64" : "nodejs_linux_amd64"; + const nodePath = path.join(process.env.RUNFILES, "df", `external/${platformPath}/bin/nodejs/bin/node`); + const workerRootPath = path.join(process.env.RUNFILES, "df", "sandbox/worker"); + const sandboxerPath = path.join(process.env.RUNFILES, "df", `sandbox/compile_executor`); + if (useSandbox2) { + return new CompileChildProcess( + spawn(sandboxerPath, [nodePath, workerRootPath, socket, encode64(dataform.CompileConfig, compileConfig), compileConfig.projectDir], { stdio: [0, 1, 2, "ipc", "pipe"] }) + ); + } else { + return new CompileChildProcess( + spawn(nodePath, [path.join(workerRootPath, "worker_bundle.js"), socket, encode64(dataform.CompileConfig, compileConfig)], { stdio: [0, 1, 2, "ipc", "pipe"] }) + ); + } } private readonly childProcess: ChildProcess; @@ -73,34 +93,22 @@ export class CompileChildProcess { this.childProcess = childProcess; } - public async compile(compileConfig: dataform.ICompileConfig) { + public async timeout(timeoutMillis: number) { const compileInChildProcess = new Promise(async (resolve, reject) => { - this.childProcess.on("error", (e: Error) => reject(coerceAsError(e))); - - this.childProcess.on("message", (messageOrError: string | Error) => { - if (typeof messageOrError === "string") { - resolve(messageOrError); - return; - } - reject(coerceAsError(messageOrError)); - }); - - this.childProcess.on("close", exitCode => { + this.childProcess.on("exit", exitCode => { if (exitCode !== 0) { reject(new Error(`Compilation child process exited with exit code ${exitCode}.`)); } + resolve("Compilation completed successfully"); }); - - // Trigger the child process to start compiling. - this.childProcess.send(compileConfig); }); let timer; const timeout = new Promise( (resolve, reject) => - (timer = setTimeout( - () => reject(new CompilationTimeoutError("Compilation timed out")), - compileConfig.timeoutMillis || 5000 - )) + (timer = setTimeout( + () => reject(new CompilationTimeoutError("Compilation timed out")), + timeoutMillis + )) ); try { await Promise.race([timeout, compileInChildProcess]); @@ -119,9 +127,8 @@ export class CompileChildProcess { export const checkDataformJsonValidity = (dataformJsonParsed: { [prop: string]: any }) => { const invalidWarehouseProp = () => { return dataformJsonParsed.warehouse && !validWarehouses.includes(dataformJsonParsed.warehouse) - ? `Invalid value on property warehouse: ${ - dataformJsonParsed.warehouse - }. Should be one of: ${validWarehouses.join(", ")}.` + ? `Invalid value on property warehouse: ${dataformJsonParsed.warehouse + }. Should be one of: ${validWarehouses.join(", ")}.` : null; }; const invalidProp = () => { diff --git a/packages/@dataform/cli/worker.ts b/packages/@dataform/cli/worker.ts index 797ecafb9..25f974212 100644 --- a/packages/@dataform/cli/worker.ts +++ b/packages/@dataform/cli/worker.ts @@ -1,2 +1,3 @@ -import { listenForCompileRequest } from "df/sandbox/vm/compile"; -listenForCompileRequest(); +import { compileAndSend } from "df/sandbox/vm/compile"; + +compileAndSend(process.argv[2], process.argv[3]); diff --git a/sandbox/BUILD b/sandbox/BUILD new file mode 100644 index 000000000..3291f9bae --- /dev/null +++ b/sandbox/BUILD @@ -0,0 +1,26 @@ +package(default_visibility = ["//visibility:public"]) + +load("@com_google_sandboxed_api//sandboxed_api/bazel:build_defs.bzl", "sapi_platform_copts") + +cc_binary( + name = "compile_executor", + srcs = ["compile_executor.cc"], + args = [ + "$(location @nodejs_linux_amd64//:node_bin)", + "$(location //sandbox/worker:worker_bundle.js)", + ], + copts = sapi_platform_copts(), + data = [ + "@nodejs_linux_amd64//:node_bin", + "//sandbox/worker:worker_bundle.js", + "//sandbox/worker:node_modules", + ], + deps = [ + "@bazel_tools//tools/cpp/runfiles", + "@com_google_absl//absl/base:core_headers", + "@com_google_sandboxed_api//sandboxed_api/sandbox2", + "@com_google_sandboxed_api//sandboxed_api/sandbox2:comms", + "@com_google_sandboxed_api//sandboxed_api/sandbox2/util:bpf_helper", + "@com_google_sandboxed_api//sandboxed_api/util:runfiles", + ], +) diff --git a/sandbox/compile_executor.cc b/sandbox/compile_executor.cc new file mode 100644 index 000000000..6b0077e72 --- /dev/null +++ b/sandbox/compile_executor.cc @@ -0,0 +1,254 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Based on this example: +// https://github.com/google/sandboxed-api/blob/master/sandboxed_api/sandbox2/examples/static/static_sandbox.cc + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "absl/base/internal/raw_logging.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "sandboxed_api/sandbox2/executor.h" +#include "sandboxed_api/sandbox2/limits.h" +#include "sandboxed_api/sandbox2/policy.h" +#include "sandboxed_api/sandbox2/policybuilder.h" +#include "sandboxed_api/sandbox2/result.h" +#include "sandboxed_api/sandbox2/sandbox2.h" +#include "sandboxed_api/sandbox2/util.h" +#include "sandboxed_api/sandbox2/util/bpf_helper.h" +#include "sandboxed_api/util/flag.h" +#include "sandboxed_api/util/runfiles.h" +#include "tools/cpp/runfiles/runfiles.h" + +namespace fs = std::filesystem; + +const int TIMEOUT_SECS = 1000; + +void OutputFD(int stdoutFd, int errFd) +{ + for (;;) + { + char stdoutBuf[4096]; + char stderrBuf[4096]; + ssize_t stdoutRLen = read(stdoutFd, stdoutBuf, sizeof(stdoutBuf)); + ssize_t stderrRLen = read(errFd, stderrBuf, sizeof(stderrBuf)); + if (stdoutRLen < 1) + { + break; + } + } +} + +int main(int argc, char **argv) +{ + gflags::ParseCommandLineFlags(&argc, &argv, true); + google::InitGoogleLogging(argv[0]); + + std::string nodeRelativePath(argv[1]); + std::string workerRelativeRoot(argv[2]); + std::string socketPath(argv[3]); + std::string compileConfigBase64(argv[4]); + std::string projectDir(argv[5]); + + std::string currentPath = std::string(fs::current_path()) + "/"; + std::string nodePath = + sapi::GetDataDependencyFilePath(nodeRelativePath); + + std::string workerRoot = sapi::GetDataDependencyFilePath(workerRelativeRoot); + std::string workerBundle = workerRoot + "/worker_bundle.js"; + + std::vector args = { + nodePath, + "/worker_root/worker_bundle.js", + socketPath, + compileConfigBase64 + }; + + auto executor = absl::make_unique(nodePath, args); + + executor->set_enable_sandbox_before_exec(true) + .limits() + ->set_rlimit_as(RLIM64_INFINITY) + .set_rlimit_fsize(4ULL << 20) + .set_rlimit_cpu(RLIM64_INFINITY) + .set_walltime_limit(absl::Seconds(90)); + + int stdoutFd = executor->ipc()->ReceiveFd(STDOUT_FILENO); + int stderrFd = executor->ipc()->ReceiveFd(STDERR_FILENO); + + auto policy = sandbox2::PolicyBuilder() + // Workaround to make the forkserver's execveat work. + .AddFileAt("/dev/zero", "/dev/fd/1022", false) + + .AddFile(socketPath, false) + .AddDirectory(projectDir, true) + .AddLibrariesForBinary(nodePath) + + .AddFileAt(workerRoot + "/worker_bundle.js", "/worker_root/worker_bundle.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/index.js", "/worker_root/node_modules/vm2/index.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/bridge.js", "/worker_root/node_modules/vm2/lib/bridge.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/builtin.js", "/worker_root/node_modules/vm2/lib/builtin.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/cli.js", "/worker_root/node_modules/vm2/lib/cli.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/compiler.js", "/worker_root/node_modules/vm2/lib/compiler.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/events.js", "/worker_root/node_modules/vm2/lib/events.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/filesystem.js", "/worker_root/node_modules/vm2/lib/filesystem.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/main.js", "/worker_root/node_modules/vm2/lib/main.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/nodevm.js", "/worker_root/node_modules/vm2/lib/nodevm.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/resolver-compat.js", "/worker_root/node_modules/vm2/lib/resolver-compat.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/resolver.js", "/worker_root/node_modules/vm2/lib/resolver.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/script.js", "/worker_root/node_modules/vm2/lib/script.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/setup-node-sandbox.js", "/worker_root/node_modules/vm2/lib/setup-node-sandbox.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/setup-sandbox.js", "/worker_root/node_modules/vm2/lib/setup-sandbox.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/transformer.js", "/worker_root/node_modules/vm2/lib/transformer.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/lib/vm.js", "/worker_root/node_modules/vm2/lib/vm.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/node_modules/.bin/acorn", "/worker_root/node_modules/vm2/node_modules/.bin/acorn", true) + .AddFileAt(workerRoot + "/node_modules/vm2/node_modules/acorn/bin/acorn", "/worker_root/node_modules/vm2/node_modules/acorn/bin/acorn", true) + .AddFileAt(workerRoot + "/node_modules/vm2/node_modules/acorn/dist/acorn.js", "/worker_root/node_modules/vm2/node_modules/acorn/dist/acorn.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/node_modules/acorn/dist/acorn.mjs", "/worker_root/node_modules/vm2/node_modules/acorn/dist/acorn.mjs", true) + .AddFileAt(workerRoot + "/node_modules/vm2/node_modules/acorn/dist/bin.js", "/worker_root/node_modules/vm2/node_modules/acorn/dist/bin.js", true) + .AddFileAt(workerRoot + "/node_modules/vm2/node_modules/acorn/package.json", "/worker_root/node_modules/vm2/node_modules/acorn/package.json", true) + .AddFileAt(workerRoot + "/node_modules/vm2/package.json", "/worker_root/node_modules/vm2/package.json", true) + .AddFileAt(workerRoot + "/node_modules/acorn-walk/dist/walk.js", "/worker_root/node_modules/acorn-walk/dist/walk.js", true) + .AddFileAt(workerRoot + "/node_modules/acorn-walk/dist/walk.mjs", "/worker_root/node_modules/acorn-walk/dist/walk.mjs", true) + .AddFileAt(workerRoot + "/node_modules/acorn-walk/package.json", "/worker_root/node_modules/acorn-walk/package.json", true) + + // System policies are described here as "[syscall number], reason". + + // [202/futex], fast user-space locking, used by v8 when available. + // If not available, V8 will emulate them instead, which is slower: + // https://source.corp.google.com/cobalt/third_party/v8/src/execution/futex-emulation.h;rcl=8a873473f20e4e6ad0a507e6ae257e4d1bcc9416;l=22 + .AllowFutexOp(FUTEX_WAKE) + .AllowFutexOp(FUTEX_WAIT) + .AllowFutexOp(FUTEX_CMP_REQUEUE) + + // File and directory content handling. + .AllowRead() + .AllowReaddir() + .AllowWrite() + .AllowAccess() + .AllowGetIDs() + + // [257/openat], open a file relative to a directory file descriptor. + // Required for opening files. + .AllowOpen() + // [9/mmap], map or unmap files or devices into memory. + // JS files are loaded into memory by V8. + .AllowMmap() + + // [24/sched_yield], allow delegation back to the sandboxer on timeout. + .AllowSyscall(__NR_sched_yield) + + // [302/prlimit64], set resource limits of 64 bit processes. + .AllowSyscall(__NR_prlimit64) + + // Allow PKU for protecting spaces. + // https://groups.google.com/a/google.com/g/v8-google3/c/5qBIb3IQ4J0 + .AllowSyscall(__NR_pkey_alloc) + .AllowSyscall(__NR_pkey_free) + .AllowSyscall(__NR_pkey_mprotect) + + // [39/getpid], get process ID. + .AllowSyscalls({__NR_getpid, __NR_gettid}) + // [56/clone], create a child process. Used for thread creation. + .AllowSyscall(__NR_clone) + // [234/tgkill], send a kill signal to a thread. Inparticular used when + // hitting memory limits. + .AllowSyscall(__NR_tgkill) + // Memory management. + .AllowTcMalloc() + // [28/madvise], give advice about use of memory + .AllowSyscall(__NR_madvise) + // [10/mprotect], set protection of a region of memory. + .AllowSyscall(__NR_mprotect) + // [324/membarrier], issue memory barriers. + .AllowSyscall(__NR_membarrier) + // [16/ioctl], used for terminal output. + .AllowSyscall(__NR_ioctl) + // [330/pkey_alloc] V8 uses for querying available memory protection. + .AllowSyscall(__NR_pkey_alloc) + // Needed in v8::base::Stack::GetStackStart(). + .AllowSyscall(__NR_sched_getaffinity) + .AllowTime() + .AllowExit() + .AllowGetRandom() + .AllowDynamicStartup() + // For UDS communication. + .AllowSyscall(__NR_rt_sigprocmask) + .AllowSyscall(__NR_rt_sigaction) + .AllowSyscall(__NR_fcntl) + .AllowSyscall(__NR_getsockopt) + .AllowSyscall(__NR_setsockopt) + .AllowSyscall(__NR_sendto) + .AllowSyscall(__NR_shutdown) + .AllowSyscall(__NR_bind) + .AllowSyscall(__NR_listen) + .AllowSyscall(__NR_connect) + .AllowSyscall(__NR_getsockname) + .AllowSyscall(__NR_socket) + .AllowSyscall(__NR_socketpair) + .AllowSyscall(__NR_sendmmsg) + // Allow epoll I/O event notification and piping for fd data transferral. + .AllowSyscall(__NR_epoll_create1) + .AllowSyscall(__NR_epoll_ctl) + .AllowSyscall(__NR_epoll_wait) + .AllowSyscall(__NR_pipe2) + .AllowSyscall(__NR_eventfd2) + + .AllowSyscall(__NR_clone3) + .AllowSyscall(__NR_sysinfo) + .AllowSyscall(__NR_statx) + .AllowSyscall(__NR_getcwd) + .BuildOrDie(); + + sandbox2::Sandbox2 s2(std::move(executor), std::move(policy)); + + // If the sandbox program fails to start, return early. + if (!s2.RunAsync()) + { + auto result = s2.AwaitResultWithTimeout( + absl::Seconds(TIMEOUT_SECS)); + LOG(ERROR) << "sandbox failed to start: " << result->ToString(); + return EXIT_FAILURE; + } + + auto result = s2.AwaitResultWithTimeout( + absl::Seconds(TIMEOUT_SECS)); + + OutputFD(stdoutFd, stderrFd); + + printf("Final execution status: %s\n", result->ToString().c_str()); + + return result.ok() && (result->final_status() == sandbox2::Result::OK) ? EXIT_SUCCESS + : EXIT_FAILURE; +} diff --git a/sandbox/vm/compile.ts b/sandbox/vm/compile.ts index 4ce36a5c7..c4983e66c 100644 --- a/sandbox/vm/compile.ts +++ b/sandbox/vm/compile.ts @@ -1,8 +1,10 @@ import * as path from "path"; import { CompilerFunction, NodeVM } from "vm2"; +import * as net from "net"; import { dataform } from "df/protos/ts"; import { createCoreExecutionRequest, createGenIndexConfig } from "df/sandbox/vm/create_config"; +import { decode64 } from "df/common/protos"; function missingValidCorePackageError() { return new Error( @@ -75,21 +77,29 @@ export function compile(compileConfig: dataform.ICompileConfig) { return userCodeVm.run(genIndex(createGenIndexConfig(compileConfig)), vmIndexFileName); } -export function listenForCompileRequest() { - process.on("message", (compileConfig: dataform.ICompileConfig) => { +export function compileAndSend(socket: string, encodedCompileConfig: string) { + const client = net.createConnection(socket); + client.on("connect", () => { + const compileConfig = decode64(dataform.CompileConfig, encodedCompileConfig); try { const compiledResult = compile(compileConfig); - process.send(compiledResult); + client.write(compiledResult, (err) => { + client.end(); + process.exit(0); + }); } catch (e) { const serializableError = {}; for (const prop of Object.getOwnPropertyNames(e)) { (serializableError as any)[prop] = e[prop]; } - process.send(serializableError); + client.write("ERROR:" + JSON.stringify(serializableError), (err) => { + client.end(); + process.exit(0); + }); } }); } if (require.main === module) { - listenForCompileRequest(); + compileAndSend(process.argv[2], process.argv[3]); } diff --git a/sandbox/worker/BUILD b/sandbox/worker/BUILD new file mode 100644 index 000000000..79862131e --- /dev/null +++ b/sandbox/worker/BUILD @@ -0,0 +1,78 @@ +load("//tools:ts_library.bzl", "ts_library") + +package(default_visibility = ["//visibility:public"]) + +ts_library( + name = "worker", + srcs = glob(["*.ts"]), + deps = [ + "//common/protos", + "//protos:ts", + "//sandbox/vm", + "@npm//@types/node", + ], +) + +load("@npm//webpack:index.bzl", "webpack") + +webpack( + name = "bundler", + data = [ + ":webpack.config.js", + ":worker", + "@npm//webpack-cli", + ], +) + +load("@build_bazel_rules_nodejs//:index.bzl", "npm_package_bin") + +npm_package_bin( + name = "bundle", + outs = [ + "worker_bundle.js", + ], + args = [ + "--config=sandbox/worker/webpack.config.js", + "--output=$(location worker_bundle.js)", + ], + data = [ + ":webpack.config.js", + ], + tool = ":bundler", +) + +load("//tools/common:copy.bzl", "copy_files") + +copy_files( + name = "node_modules", + map = { + # vm2 + "@npm//:node_modules/vm2/index.js": "node_modules/vm2/index.js", + "@npm//:node_modules/vm2/lib/bridge.js": "node_modules/vm2/lib/bridge.js", + "@npm//:node_modules/vm2/lib/builtin.js": "node_modules/vm2/lib/builtin.js", + "@npm//:node_modules/vm2/lib/cli.js": "node_modules/vm2/lib/cli.js", + "@npm//:node_modules/vm2/lib/compiler.js": "node_modules/vm2/lib/compiler.js", + "@npm//:node_modules/vm2/lib/events.js": "node_modules/vm2/lib/events.js", + "@npm//:node_modules/vm2/lib/filesystem.js": "node_modules/vm2/lib/filesystem.js", + "@npm//:node_modules/vm2/lib/main.js": "node_modules/vm2/lib/main.js", + "@npm//:node_modules/vm2/lib/nodevm.js": "node_modules/vm2/lib/nodevm.js", + "@npm//:node_modules/vm2/lib/resolver-compat.js": "node_modules/vm2/lib/resolver-compat.js", + "@npm//:node_modules/vm2/lib/resolver.js": "node_modules/vm2/lib/resolver.js", + "@npm//:node_modules/vm2/lib/script.js": "node_modules/vm2/lib/script.js", + "@npm//:node_modules/vm2/lib/setup-node-sandbox.js": "node_modules/vm2/lib/setup-node-sandbox.js", + "@npm//:node_modules/vm2/lib/setup-sandbox.js": "node_modules/vm2/lib/setup-sandbox.js", + "@npm//:node_modules/vm2/lib/transformer.js": "node_modules/vm2/lib/transformer.js", + "@npm//:node_modules/vm2/lib/vm.js": "node_modules/vm2/lib/vm.js", + "@npm//:node_modules/vm2/node_modules/.bin/acorn": "node_modules/vm2/node_modules/.bin/acorn", + "@npm//:node_modules/vm2/node_modules/acorn/bin/acorn": "node_modules/vm2/node_modules/acorn/bin/acorn", + "@npm//:node_modules/vm2/node_modules/acorn/dist/acorn.js": "node_modules/vm2/node_modules/acorn/dist/acorn.js", + "@npm//:node_modules/vm2/node_modules/acorn/dist/acorn.mjs": "node_modules/vm2/node_modules/acorn/dist/acorn.mjs", + "@npm//:node_modules/vm2/node_modules/acorn/dist/bin.js": "node_modules/vm2/node_modules/acorn/dist/bin.js", + "@npm//:node_modules/vm2/node_modules/acorn/package.json": "node_modules/vm2/node_modules/acorn/package.json", + "@npm//:node_modules/vm2/package.json": "node_modules/vm2/package.json", + # acorn-walk + "@npm//:node_modules/acorn-walk/dist/walk.js": "node_modules/acorn-walk/dist/walk.js", + "@npm//:node_modules/acorn-walk/dist/walk.mjs": "node_modules/acorn-walk/dist/walk.mjs", + "@npm//:node_modules/acorn-walk/package.json": "node_modules/acorn-walk/package.json", + }, +) diff --git a/sandbox/worker/webpack.config.js b/sandbox/worker/webpack.config.js new file mode 100644 index 000000000..1903e1541 --- /dev/null +++ b/sandbox/worker/webpack.config.js @@ -0,0 +1,42 @@ +const path = require("path"); +const webpack = require("webpack"); +const fs = require("fs"); + +module.exports = (env, argv) => { + const config = { + mode: argv.mode || "development", + target: 'node', + entry: [path.resolve(process.env.RUNFILES, "df/sandbox/worker/worker")], + output: { + path: path.dirname(path.resolve(argv.output)), + filename: path.basename(argv.output) + }, + externals: { + "vm2": "require('vm2')" + }, + optimization: { + minimize: true + }, + stats: { + warnings: true + }, + node: { + fs: "empty", + child_process: "empty" + }, + resolve: { + extensions: [".ts", ".tsx", ".js", ".jsx", ".json", ".css"], + alias: { + df: path.resolve(process.env.RUNFILES, "df") + } + }, + + plugins: [ + new webpack.optimize.LimitChunkCountPlugin({ + maxChunks: 1 + }) + ], + + }; + return config; +}; diff --git a/sandbox/worker/worker.ts b/sandbox/worker/worker.ts new file mode 100644 index 000000000..25f974212 --- /dev/null +++ b/sandbox/worker/worker.ts @@ -0,0 +1,3 @@ +import { compileAndSend } from "df/sandbox/vm/compile"; + +compileAndSend(process.argv[2], process.argv[3]); diff --git a/scripts/cloudbuild/bazel_test b/scripts/cloudbuild/bazel_test index 960f16d65..ef1e82ecf 100755 --- a/scripts/cloudbuild/bazel_test +++ b/scripts/cloudbuild/bazel_test @@ -1,9 +1,5 @@ #!/bin/bash set -e -# Run tslint. -bazel run @nodejs//:yarn -bazel build @npm//tslint/bin:tslint && bazel-bin/external/npm/tslint/bin/tslint.sh --project . - # Run all the tests -bazel test --config=remote-cache ... --build_tests_only --test_env=USE_CLOUD_BUILD_NETWORK=true +bazel test --config=remote-cache ... --build_tests_only --test_env=USE_CLOUD_BUILD_NETWORK=true -- -tests/integration/... -tests/cli/... diff --git a/tests/api/BUILD b/tests/api/BUILD index 43933900e..e995b96fa 100644 --- a/tests/api/BUILD +++ b/tests/api/BUILD @@ -34,4 +34,5 @@ ts_test_suite( "@npm//stack-trace", "@npm//ts-mockito", ], + tags = ["local"] ) diff --git a/tests/api/examples.spec.ts b/tests/api/examples.spec.ts index 8234523b1..192f1360f 100644 --- a/tests/api/examples.spec.ts +++ b/tests/api/examples.spec.ts @@ -7,278 +7,289 @@ import { targetAsReadableString } from "df/core/targets"; import { dataform } from "df/protos/ts"; import { suite, test } from "df/testing"; import { cleanSql } from "df/tests/utils"; +import { exec } from "child_process"; + + +suite("examples", ({ before }) => { + const cleanExamplesRoot = path.join(process.env.TEST_TMPDIR, "examples"); + before("copy examples into clean fs", () => { + const cp = exec(`cp -LR ${path.resolve("examples")} ${cleanExamplesRoot}`); + return new Promise((resolve) => { + cp.on("close", () => resolve()); + }); + }); -suite("examples", () => { suite("common_v2 bigquery", async () => { - for (const useMain of [true, false]) { - for (const databaseSuffix of ["", "foo"]) { - for (const schemaSuffix of ["", "bar"]) { - const databaseWithSuffix = (database: string) => - databaseSuffix ? `${database}_${databaseSuffix}` : database; - const schemaWithSuffix = (schema: string) => - schemaSuffix ? `${schema}_${schemaSuffix}` : schema; - - test(`compiles with database suffix "${databaseSuffix}", schema suffix "${schemaSuffix}"`, async () => { - const graph = await compile({ - projectDir: path.resolve("examples/common_v2"), - projectConfigOverride: { schemaSuffix, databaseSuffix, warehouse: "bigquery" }, - useMain - }); - expect( - graph.graphErrors.compilationErrors.map(({ fileName, message }) => ({ - fileName, - message - })) - ).deep.equals([ - { - fileName: "includes/example_ignore.js", - message: "publish is not defined" - }, - { - fileName: "definitions/has_compile_errors/assertion_with_bigquery.sqlx", - message: - 'Unexpected property "bigquery" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' - }, - { - fileName: "definitions/has_compile_errors/assertion_with_materialized.sqlx", - message: - 'Unexpected property "materialized" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' - }, - { - fileName: "definitions/has_compile_errors/assertion_with_output.sqlx", - message: - 'Unexpected property "hasOutput" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' - }, - { - fileName: "definitions/has_compile_errors/assertion_with_postops.sqlx", - message: "Actions may only include post_operations if they create a dataset." - }, - { - fileName: "definitions/has_compile_errors/assertion_with_preops.sqlx", - message: "Actions may only include pre_operations if they create a dataset." - }, - { - fileName: "definitions/has_compile_errors/assertion_with_redshift.sqlx", - message: - 'Unexpected property "redshift" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' - }, - { - fileName: "definitions/has_compile_errors/protected_assertion.sqlx", - message: - "Actions may only specify 'protected: true' if they are of type 'incremental'." - }, - { - fileName: "definitions/has_compile_errors/protected_assertion.sqlx", - message: - 'Unexpected property "protected" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' - }, - { - fileName: "definitions/has_compile_errors/view_with_incremental.sqlx", - message: - "Actions may only include incremental_where if they are of type 'incremental'." - }, - { - fileName: "definitions/has_compile_errors/view_with_multiple_statements.sqlx", - message: - "Actions may only contain more than one SQL statement if they are of type 'operations'." - }, - { - fileName: "definitions/has_compile_errors/view_with_semi_colon_at_end.sqlx", - message: "Semi-colons are not allowed at the end of SQL statements." - }, - { - fileName: "definitions/has_compile_errors/table_with_materialized.sqlx", - message: - "The 'materialized' option is only valid for Snowflake and BigQuery views" - }, - { - fileName: "definitions/has_compile_errors/view_without_hermetic.sqlx", - message: - "Zero-dependency actions which create datasets are required to explicitly declare 'hermetic: (true|false)' when run caching is turned on." - } - ]); - - // Check JS blocks get processed. - const exampleJsBlocks = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_js_blocks" - ) - ); - expect(exampleJsBlocks.type).equals("table"); - expect(exampleJsBlocks.query.trim()).equals("select 1 as foo"); - - // Check we can import and use an external package. - const exampleIncremental = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + for (const useSandbox2 of [true, false]) { + for (const useMain of [true, false]) { + for (const databaseSuffix of ["", "foo"]) { + for (const schemaSuffix of ["", "bar"]) { + const databaseWithSuffix = (database: string) => + databaseSuffix ? `${database}_${databaseSuffix}` : database; + const schemaWithSuffix = (schema: string) => + schemaSuffix ? `${schema}_${schemaSuffix}` : schema; + + test(`compiles with database suffix "${databaseSuffix}", schema suffix "${schemaSuffix}"`, async () => { + const graph = await compile({ + projectDir: path.join(cleanExamplesRoot, "common_v2"), + projectConfigOverride: { schemaSuffix, databaseSuffix, warehouse: "bigquery" }, + useMain + }, useSandbox2); + expect( + graph.graphErrors.compilationErrors.map(({ fileName, message }) => ({ + fileName, + message + })) + ).deep.equals([ + { + fileName: "includes/example_ignore.js", + message: "publish is not defined" + }, + { + fileName: "definitions/has_compile_errors/assertion_with_bigquery.sqlx", + message: + 'Unexpected property "bigquery" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' + }, + { + fileName: "definitions/has_compile_errors/assertion_with_materialized.sqlx", + message: + 'Unexpected property "materialized" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' + }, + { + fileName: "definitions/has_compile_errors/assertion_with_output.sqlx", + message: + 'Unexpected property "hasOutput" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' + }, + { + fileName: "definitions/has_compile_errors/assertion_with_postops.sqlx", + message: "Actions may only include post_operations if they create a dataset." + }, + { + fileName: "definitions/has_compile_errors/assertion_with_preops.sqlx", + message: "Actions may only include pre_operations if they create a dataset." + }, + { + fileName: "definitions/has_compile_errors/assertion_with_redshift.sqlx", + message: + 'Unexpected property "redshift" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' + }, + { + fileName: "definitions/has_compile_errors/protected_assertion.sqlx", + message: + "Actions may only specify 'protected: true' if they are of type 'incremental'." + }, + { + fileName: "definitions/has_compile_errors/protected_assertion.sqlx", + message: + 'Unexpected property "protected" in assertion config. Supported properties are: ["database","dependencies","description","disabled","hermetic","name","schema","tags","type"]' + }, + { + fileName: "definitions/has_compile_errors/view_with_incremental.sqlx", + message: + "Actions may only include incremental_where if they are of type 'incremental'." + }, + { + fileName: "definitions/has_compile_errors/view_with_multiple_statements.sqlx", + message: + "Actions may only contain more than one SQL statement if they are of type 'operations'." + }, + { + fileName: "definitions/has_compile_errors/view_with_semi_colon_at_end.sqlx", + message: "Semi-colons are not allowed at the end of SQL statements." + }, + { + fileName: "definitions/has_compile_errors/table_with_materialized.sqlx", + message: + "The 'materialized' option is only valid for Snowflake and BigQuery views" + }, + { + fileName: "definitions/has_compile_errors/view_without_hermetic.sqlx", + message: + "Zero-dependency actions which create datasets are required to explicitly declare 'hermetic: (true|false)' when run caching is turned on." + } + ]); + + // Check JS blocks get processed. + const exampleJsBlocks = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_js_blocks" + ) + ); + expect(exampleJsBlocks.type).equals("table"); + expect(exampleJsBlocks.query.trim()).equals("select 1 as foo"); + + // Check we can import and use an external package. + const exampleIncremental = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_incremental" + ) + ); + expect(exampleIncremental.protected).eql(true); + expect(exampleIncremental.query.trim()).equals("select current_timestamp() as ts"); + expect(exampleIncremental.where.trim()).equals( + `ts > (select max(ts) from \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), "example_incremental" - ) - ); - expect(exampleIncremental.protected).eql(true); - expect(exampleIncremental.query.trim()).equals("select current_timestamp() as ts"); - expect(exampleIncremental.where.trim()).equals( - `ts > (select max(ts) from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_incremental" - )}\`) or (select max(ts) from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_incremental" - )}\`) is null` - ); - - const exampleIsIncremental = graph.tables.filter( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + )}\`) or (select max(ts) from \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), - "example_is_incremental" - ) - )[0]; - expect(cleanSql(exampleIsIncremental.query.trim())).equals( - "select * from (select current_timestamp() as ts)" - ); - expect(cleanSql(exampleIsIncremental.incrementalQuery)).equals( - cleanSql( - `select * from (select current_timestamp() as ts) - where ts > (select max(ts) from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_is_incremental" - )}\`) or (select max(ts) from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_is_incremental" + "example_incremental" )}\`) is null` - ) - ); - - expect(exampleIsIncremental.incrementalPreOps).to.eql(["\n select 1\n"]); - expect(exampleIsIncremental.incrementalPostOps).to.eql(["\n select 15\n"]); - - // Check tables defined in includes are not included. - const exampleIgnore = graph.tables.find( - (t: dataform.ITable) => targetAsReadableString(t.target) === "example_ignore" - ); - expect(exampleIgnore).equal(undefined); - const exampleIgnore2 = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_ignore" - ) - ); - expect(exampleIgnore2).equal(undefined); - - // Check SQL files with raw back-ticks get escaped. - const exampleBackticks = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_backticks" - ) - ); - expect(cleanSql(exampleBackticks.query)).equals( - "select * from `tada-analytics.df_integration_test.sample_data`" - ); - expect(exampleBackticks.preOps).to.eql([ - '\n GRANT SELECT ON `tada-analytics.df_integration_test.sample_data` TO GROUP "allusers@dataform.co"\n' - ]); - expect(exampleBackticks.postOps).to.eql([]); - - // Check deferred calls to table resolve to the correct definitions file. - const exampleDeferred = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_deferred" + ); + + const exampleIsIncremental = graph.tables.filter( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_is_incremental" + ) + )[0]; + expect(cleanSql(exampleIsIncremental.query.trim())).equals( + "select * from (select current_timestamp() as ts)" + ); + expect(cleanSql(exampleIsIncremental.incrementalQuery)).equals( + cleanSql( + `select * from (select current_timestamp() as ts) + where ts > (select max(ts) from \`${dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_is_incremental" + )}\`) or (select max(ts) from \`${dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_is_incremental" + )}\`) is null` ) - ); - expect(exampleDeferred.fileName).includes("definitions/example_deferred.js"); - - // Check inline tables - const exampleInline = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + ); + + expect(exampleIsIncremental.incrementalPreOps).to.eql(["\n select 1\n"]); + expect(exampleIsIncremental.incrementalPostOps).to.eql(["\n select 15\n"]); + + // Check tables defined in includes are not included. + const exampleIgnore = graph.tables.find( + (t: dataform.ITable) => targetAsReadableString(t.target) === "example_ignore" + ); + expect(exampleIgnore).equal(undefined); + const exampleIgnore2 = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_ignore" + ) + ); + expect(exampleIgnore2).equal(undefined); + + // Check SQL files with raw back-ticks get escaped. + const exampleBackticks = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_backticks" + ) + ); + expect(cleanSql(exampleBackticks.query)).equals( + "select * from `tada-analytics.df_integration_test.sample_data`" + ); + expect(exampleBackticks.preOps).to.eql([ + '\n GRANT SELECT ON `tada-analytics.df_integration_test.sample_data` TO GROUP "allusers@dataform.co"\n' + ]); + expect(exampleBackticks.postOps).to.eql([]); + + // Check deferred calls to table resolve to the correct definitions file. + const exampleDeferred = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_deferred" + ) + ); + expect(exampleDeferred.fileName).includes("definitions/example_deferred.js"); + + // Check inline tables + const exampleInline = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_inline" + ) + ); + expect(exampleInline.type).equals("inline"); + expect(exampleInline.query.trim()).equals( + `select * from \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), - "example_inline" - ) - ); - expect(exampleInline.type).equals("inline"); - expect(exampleInline.query.trim()).equals( - `select * from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "sample_data" - )}\`` - ); - expect(exampleInline.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "sample_data" - }) - ]); - - const exampleUsingInline = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + "sample_data" + )}\`` + ); + expect(exampleInline.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "sample_data" + }) + ]); + + const exampleUsingInline = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_using_inline" + ) + ); + expect(exampleUsingInline.type).equals("table"); + expect(exampleUsingInline.query.trim()).equals( + `select * from (\n\nselect * from \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), - "example_using_inline" - ) - ); - expect(exampleUsingInline.type).equals("table"); - expect(exampleUsingInline.query.trim()).equals( - `select * from (\n\nselect * from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "sample_data" - )}\`\n)\nwhere true` - ); - expect(exampleUsingInline.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "sample_data" - }) - ]); - - // Check view - const exampleView = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + "sample_data" + )}\`\n)\nwhere true` + ); + expect(exampleUsingInline.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "sample_data" + }) + ]); + + // Check view + const exampleView = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_view" + ) + ); + expect(exampleView.type).equals("view"); + expect(exampleView.query.trim()).equals( + `select * from \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), - "example_view" - ) - ); - expect(exampleView.type).equals("view"); - expect(exampleView.query.trim()).equals( - `select * from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "sample_data" - )}\`\n` + + "sample_data" + )}\`\n` + `inner join select * from \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("override_schema"), @@ -289,466 +300,467 @@ suite("examples", () => { schemaWithSuffix("df_integration_test"), "override_database_example" )}\`` - ); - expect(exampleView.target).deep.equals( - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "example_view" - }) - ); - expect(exampleView.canonicalTarget).deep.equals( - dataform.Target.create({ - database: "tada-analytics", - schema: "df_integration_test", - name: "example_view" - }) - ); - expect(exampleView.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "sample_data" - }), - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("override_schema"), - name: "override_schema_example" - }), - dataform.Target.create({ - database: databaseWithSuffix("override_database"), - schema: schemaWithSuffix("df_integration_test"), - name: "override_database_example" - }) - ]); - expect(exampleView.tags).to.eql([]); - - // Check materialized view - const exampleMaterializedView = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + ); + expect(exampleView.target).deep.equals( + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "example_view" + }) + ); + expect(exampleView.canonicalTarget).deep.equals( + dataform.Target.create({ + database: "tada-analytics", + schema: "df_integration_test", + name: "example_view" + }) + ); + expect(exampleView.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "sample_data" + }), + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("override_schema"), + name: "override_schema_example" + }), + dataform.Target.create({ + database: databaseWithSuffix("override_database"), + schema: schemaWithSuffix("df_integration_test"), + name: "override_database_example" + }) + ]); + expect(exampleView.tags).to.eql([]); + + // Check materialized view + const exampleMaterializedView = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_materialized_view" + ) + ); + expect(exampleMaterializedView.type).equals("view"); + expect(exampleMaterializedView.materialized).equals(true); + expect(exampleMaterializedView.query.trim()).equals( + `select * from \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), - "example_materialized_view" - ) - ); - expect(exampleMaterializedView.type).equals("view"); - expect(exampleMaterializedView.materialized).equals(true); - expect(exampleMaterializedView.query.trim()).equals( - `select * from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "sample_data" - )}\`\n` + - `group by 1` - ); - expect(exampleMaterializedView.target).deep.equals( - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "example_materialized_view" - }) - ); - expect(exampleMaterializedView.canonicalTarget).deep.equals( - dataform.Target.create({ - database: "tada-analytics", - schema: "df_integration_test", - name: "example_materialized_view" - }) - ); - expect(exampleMaterializedView.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "sample_data" - }) - ]); - expect(exampleMaterializedView.tags).to.eql([]); - - // Check table - const exampleTable = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + "sample_data" + )}\`\n` + + `group by 1` + ); + expect(exampleMaterializedView.target).deep.equals( + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "example_materialized_view" + }) + ); + expect(exampleMaterializedView.canonicalTarget).deep.equals( + dataform.Target.create({ + database: "tada-analytics", + schema: "df_integration_test", + name: "example_materialized_view" + }) + ); + expect(exampleMaterializedView.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "sample_data" + }) + ]); + expect(exampleMaterializedView.tags).to.eql([]); + + // Check table + const exampleTable = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_table" + ) + ); + expect(exampleTable.type).equals("table"); + expect(exampleTable.query.trim()).equals( + `select * from \`${dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "sample_data" + )}\`\n\n-- here \${"is"} a \`comment\n\n/* \${"another"} \` backtick \` containing \`\`\`comment */` + ); + expect(exampleTable.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "sample_data" + }) + ]); + expect(exampleTable.preOps).to.eql([]); + expect(exampleTable.postOps).to.eql([ + `\n GRANT SELECT ON \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), "example_table" - ) - ); - expect(exampleTable.type).equals("table"); - expect(exampleTable.query.trim()).equals( - `select * from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "sample_data" - )}\`\n\n-- here \${"is"} a \`comment\n\n/* \${"another"} \` backtick \` containing \`\`\`comment */` - ); - expect(exampleTable.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "sample_data" - }) - ]); - expect(exampleTable.preOps).to.eql([]); - expect(exampleTable.postOps).to.eql([ - `\n GRANT SELECT ON \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_table" - )}\` TO GROUP "allusers@dataform.co"\n`, - `\n GRANT SELECT ON \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_table" - )}\` TO GROUP "otherusers@dataform.co"\n` - ]); - expect(exampleTable.tags).to.eql([]); - - // Check Table with tags - const exampleTableWithTags = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + )}\` TO GROUP "allusers@dataform.co"\n`, + `\n GRANT SELECT ON \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), - "example_table_with_tags" - ) - ); - expect(exampleTableWithTags.disabled).eql(true); - expect(exampleTableWithTags.tags).to.eql(["tag1", "tag2", "tag3"]); - - // Check table-with-tags's unique key assertion - const exampleTableWithTagsUniqueKeyAssertion = graph.assertions.filter( - t => - targetAsReadableString(t.target) === - dotJoined( + "example_table" + )}\` TO GROUP "otherusers@dataform.co"\n` + ]); + expect(exampleTable.tags).to.eql([]); + + // Check Table with tags + const exampleTableWithTags = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_table_with_tags" + ) + ); + expect(exampleTableWithTags.disabled).eql(true); + expect(exampleTableWithTags.tags).to.eql(["tag1", "tag2", "tag3"]); + + // Check table-with-tags's unique key assertion + const exampleTableWithTagsUniqueKeyAssertion = graph.assertions.filter( + t => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test_assertions"), + "df_integration_test_example_table_with_tags_assertions_uniqueKey_0" + ) + )[0]; + expect(exampleTableWithTagsUniqueKeyAssertion.disabled).eql(true); + expect(cleanSql(exampleTableWithTagsUniqueKeyAssertion.query)).equals( + `select * from (select sample, count(1) as index_row_count from \`${dotJoined( databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test_assertions"), - "df_integration_test_example_table_with_tags_assertions_uniqueKey_0" - ) - )[0]; - expect(exampleTableWithTagsUniqueKeyAssertion.disabled).eql(true); - expect(cleanSql(exampleTableWithTagsUniqueKeyAssertion.query)).equals( - `select * from (select sample, count(1) as index_row_count from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_table_with_tags" - )}\` group by sample) as data where index_row_count > 1` - ); - expect(exampleTableWithTagsUniqueKeyAssertion.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "example_table_with_tags" - }) - ]); - - // Check table-with-tags's row conditions assertion - const exampleTableWithTagsRowConditionsAssertion = graph.assertions.filter( - t => - targetAsReadableString(t.target) === - dotJoined( + schemaWithSuffix("df_integration_test"), + "example_table_with_tags" + )}\` group by sample) as data where index_row_count > 1` + ); + expect(exampleTableWithTagsUniqueKeyAssertion.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "example_table_with_tags" + }) + ]); + + // Check table-with-tags's row conditions assertion + const exampleTableWithTagsRowConditionsAssertion = graph.assertions.filter( + t => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test_assertions"), + "df_integration_test_example_table_with_tags_assertions_rowConditions" + ) + )[0]; + expect(exampleTableWithTagsRowConditionsAssertion.disabled).eql(true); + expect(cleanSql(exampleTableWithTagsRowConditionsAssertion.query)).equals( + `select 'sample is not null' as failing_row_condition, * from \`${dotJoined( databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test_assertions"), - "df_integration_test_example_table_with_tags_assertions_rowConditions" - ) - )[0]; - expect(exampleTableWithTagsRowConditionsAssertion.disabled).eql(true); - expect(cleanSql(exampleTableWithTagsRowConditionsAssertion.query)).equals( - `select 'sample is not null' as failing_row_condition, * from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_table_with_tags" - )}\` where not (sample is not null)` - ); - expect(exampleTableWithTagsRowConditionsAssertion.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "example_table_with_tags" - }) - ]); - - // Check sample data - const exampleSampleData = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + schemaWithSuffix("df_integration_test"), + "example_table_with_tags" + )}\` where not (sample is not null)` + ); + expect(exampleTableWithTagsRowConditionsAssertion.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "example_table_with_tags" + }) + ]); + + // Check sample data + const exampleSampleData = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "sample_data" + ) + ); + expect(exampleSampleData.type).equals("view"); + expect(exampleSampleData.query.trim()).equals( + "select 1 as sample union all\nselect 2 as sample union all\nselect 3 as sample" + ); + expect(exampleSampleData.preOps).eql([]); + expect(exampleSampleData.dependencyTargets).eql([]); + expect(exampleSampleData.actionDescriptor).to.eql( + dataform.ActionDescriptor.create({ + description: "This is some sample data.", + columns: [ + dataform.ColumnDescriptor.create({ + description: "Sample integers.", + path: ["sample"] + }) + ] + }) + ); + + // Check database override defined in "config {}". + const exampleUsingOverriddenDatabase = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("override_database"), + schemaWithSuffix("df_integration_test"), + "override_database_example" + ) + ); + + expect(exampleUsingOverriddenDatabase.target.database).equals( + databaseWithSuffix("override_database") + ); + expect(exampleUsingOverriddenDatabase.type).equals("view"); + expect(exampleUsingOverriddenDatabase.query.trim()).equals( + "select 1 as test_database_override" + ); + + // Check schema overrides defined in "config {}" + const exampleUsingOverriddenSchema = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("override_schema"), + "override_schema_example" + ) + ); + + expect(exampleUsingOverriddenSchema.target.schema).equals( + schemaWithSuffix("override_schema") + ); + expect(exampleUsingOverriddenSchema.type).equals("view"); + expect(exampleUsingOverriddenSchema.query.trim()).equals( + "select 1 as test_schema_override" + ); + + // Check schema overrides defined in "config {}" -- case with schema unchanged + const exampleUsingOverriddenSchemaUnchanged = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "override_schema_example_unchanged" + ) + ); + + expect(exampleUsingOverriddenSchemaUnchanged.target.schema).equals( + schemaWithSuffix("df_integration_test") + ); + expect(exampleUsingOverriddenSchemaUnchanged.type).equals("view"); + expect(exampleUsingOverriddenSchemaUnchanged.query.trim()).equals( + "select 1 as test_schema_override" + ); + + // Check assertion + const exampleAssertion = graph.assertions.find( + (a: dataform.IAssertion) => + targetAsReadableString(a.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("hi_there"), + "example_assertion" + ) + ); + expect(exampleAssertion.target.schema).equals(schemaWithSuffix("hi_there")); + expect(exampleAssertion.query.trim()).equals( + `select * from \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), "sample_data" - ) - ); - expect(exampleSampleData.type).equals("view"); - expect(exampleSampleData.query.trim()).equals( - "select 1 as sample union all\nselect 2 as sample union all\nselect 3 as sample" - ); - expect(exampleSampleData.preOps).eql([]); - expect(exampleSampleData.dependencyTargets).eql([]); - expect(exampleSampleData.actionDescriptor).to.eql( - dataform.ActionDescriptor.create({ - description: "This is some sample data.", - columns: [ - dataform.ColumnDescriptor.create({ - description: "Sample integers.", - path: ["sample"] - }) - ] - }) - ); - - // Check database override defined in "config {}". - const exampleUsingOverriddenDatabase = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( - databaseWithSuffix("override_database"), - schemaWithSuffix("df_integration_test"), - "override_database_example" - ) - ); - - expect(exampleUsingOverriddenDatabase.target.database).equals( - databaseWithSuffix("override_database") - ); - expect(exampleUsingOverriddenDatabase.type).equals("view"); - expect(exampleUsingOverriddenDatabase.query.trim()).equals( - "select 1 as test_database_override" - ); - - // Check schema overrides defined in "config {}" - const exampleUsingOverriddenSchema = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( + )}\` where sample = 100` + ); + expect(exampleAssertion.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "sample_data" + }) + ]); + expect(exampleAssertion.tags).to.eql([]); + expect(exampleAssertion.actionDescriptor).to.eql( + dataform.ActionDescriptor.create({ + description: "An example assertion looking for incorrect 'sample' values." + }) + ); + + // Check Assertion with tags + const exampleAssertionWithTags = graph.assertions.find( + (a: dataform.IAssertion) => + targetAsReadableString(a.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test_assertions"), + "example_assertion_with_tags" + ) + ); + expect(exampleAssertionWithTags.target.schema).equals( + schemaWithSuffix("df_integration_test_assertions") + ); + expect(exampleAssertionWithTags.tags).to.eql(["tag1", "tag2"]); + + // Check example operations file + const exampleOperations = graph.operations.find( + (o: dataform.IOperation) => + targetAsReadableString(o.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_operations" + ) + ); + expect(exampleOperations.hasOutput).equals(false); + expect(exampleOperations.queries).to.eql([ + "\n\nCREATE OR REPLACE VIEW someschema.someview AS (SELECT 1 AS test)\n", + `\nDROP VIEW IF EXISTS \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("override_schema"), "override_schema_example" - ) - ); - - expect(exampleUsingOverriddenSchema.target.schema).equals( - schemaWithSuffix("override_schema") - ); - expect(exampleUsingOverriddenSchema.type).equals("view"); - expect(exampleUsingOverriddenSchema.query.trim()).equals( - "select 1 as test_schema_override" - ); - - // Check schema overrides defined in "config {}" -- case with schema unchanged - const exampleUsingOverriddenSchemaUnchanged = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "override_schema_example_unchanged" - ) - ); - - expect(exampleUsingOverriddenSchemaUnchanged.target.schema).equals( - schemaWithSuffix("df_integration_test") - ); - expect(exampleUsingOverriddenSchemaUnchanged.type).equals("view"); - expect(exampleUsingOverriddenSchemaUnchanged.query.trim()).equals( - "select 1 as test_schema_override" - ); - - // Check assertion - const exampleAssertion = graph.assertions.find( - (a: dataform.IAssertion) => - targetAsReadableString(a.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("hi_there"), - "example_assertion" - ) - ); - expect(exampleAssertion.target.schema).equals(schemaWithSuffix("hi_there")); - expect(exampleAssertion.query.trim()).equals( - `select * from \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "sample_data" - )}\` where sample = 100` - ); - expect(exampleAssertion.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "sample_data" - }) - ]); - expect(exampleAssertion.tags).to.eql([]); - expect(exampleAssertion.actionDescriptor).to.eql( - dataform.ActionDescriptor.create({ - description: "An example assertion looking for incorrect 'sample' values." - }) - ); - - // Check Assertion with tags - const exampleAssertionWithTags = graph.assertions.find( - (a: dataform.IAssertion) => - targetAsReadableString(a.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test_assertions"), - "example_assertion_with_tags" - ) - ); - expect(exampleAssertionWithTags.target.schema).equals( - schemaWithSuffix("df_integration_test_assertions") - ); - expect(exampleAssertionWithTags.tags).to.eql(["tag1", "tag2"]); - - // Check example operations file - const exampleOperations = graph.operations.find( - (o: dataform.IOperation) => - targetAsReadableString(o.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), + )}\`\n`, + `\nDROP VIEW IF EXISTS \`${dotJoined( + databaseWithSuffix("override_database"), schemaWithSuffix("df_integration_test"), - "example_operations" - ) - ); - expect(exampleOperations.hasOutput).equals(false); - expect(exampleOperations.queries).to.eql([ - "\n\nCREATE OR REPLACE VIEW someschema.someview AS (SELECT 1 AS test)\n", - `\nDROP VIEW IF EXISTS \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("override_schema"), - "override_schema_example" - )}\`\n`, - `\nDROP VIEW IF EXISTS \`${dotJoined( - databaseWithSuffix("override_database"), - schemaWithSuffix("df_integration_test"), - "override_database_example" - )}\`\n` - ]); - expect(exampleOperations.dependencyTargets).eql([ - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("override_schema"), - name: "override_schema_example" - }), - dataform.Target.create({ - database: databaseWithSuffix("override_database"), - schema: schemaWithSuffix("df_integration_test"), - name: "override_database_example" - }), - dataform.Target.create({ - database: databaseWithSuffix("tada-analytics"), - schema: schemaWithSuffix("df_integration_test"), - name: "sample_data" - }) - ]); - expect(exampleOperations.tags).to.eql([]); - - // Check example operation with output. - const exampleOperationWithOutput = graph.operations.find( - (o: dataform.IOperation) => - targetAsReadableString(o.target) === - dotJoined( + "override_database_example" + )}\`\n` + ]); + expect(exampleOperations.dependencyTargets).eql([ + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("override_schema"), + name: "override_schema_example" + }), + dataform.Target.create({ + database: databaseWithSuffix("override_database"), + schema: schemaWithSuffix("df_integration_test"), + name: "override_database_example" + }), + dataform.Target.create({ + database: databaseWithSuffix("tada-analytics"), + schema: schemaWithSuffix("df_integration_test"), + name: "sample_data" + }) + ]); + expect(exampleOperations.tags).to.eql([]); + + // Check example operation with output. + const exampleOperationWithOutput = graph.operations.find( + (o: dataform.IOperation) => + targetAsReadableString(o.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_operation_with_output" + ) + ); + expect(exampleOperationWithOutput.target.schema).equals( + schemaWithSuffix("df_integration_test") + ); + expect(exampleOperationWithOutput.target.name).equals("example_operation_with_output"); + expect(exampleOperationWithOutput.queries).to.eql([ + `\nCREATE OR REPLACE VIEW \`${dotJoined( databaseWithSuffix("tada-analytics"), schemaWithSuffix("df_integration_test"), "example_operation_with_output" - ) - ); - expect(exampleOperationWithOutput.target.schema).equals( - schemaWithSuffix("df_integration_test") - ); - expect(exampleOperationWithOutput.target.name).equals("example_operation_with_output"); - expect(exampleOperationWithOutput.queries).to.eql([ - `\nCREATE OR REPLACE VIEW \`${dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_operation_with_output" - )}\` AS (SELECT * FROM \`some_database_name.some_external_schema_name.very_important_external_table\`)` - ]); - expect(exampleOperationWithOutput.dependencyTargets).eql([ - dataform.Target.create({ - database: "some_database_name", - schema: "some_external_schema_name", - name: "very_important_external_table" - }) - ]); - expect(exampleOperationWithOutput.actionDescriptor).to.eql( - dataform.ActionDescriptor.create({ - description: "An example operations file which outputs a dataset.", - columns: [ - dataform.ColumnDescriptor.create({ - description: "Just 1!", - path: ["TEST"] - }) - ] - }) - ); - - // Check Operation with tags - const exampleOperationsWithTags = graph.operations.find( - (o: dataform.IOperation) => - targetAsReadableString(o.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_operations_with_tags" - ) - ); - expect(exampleOperationsWithTags.tags).to.eql(["tag1"]); - - // Check declaration. - const exampleDeclaration = graph.declarations.find( - d => - targetAsReadableString(d.target) === - "some_database_name.some_external_schema_name.very_important_external_table" - ); - expect(exampleDeclaration.target).eql( - dataform.Target.create({ - database: "some_database_name", - schema: "some_external_schema_name", - name: "very_important_external_table" - }) - ); - expect(exampleDeclaration.actionDescriptor.description).to.equal( - "This table is not generated by Dataform!" - ); - - // Check testcases. - const testCase = graph.tests.find(t => t.name === "example_test_case"); - expect(testCase.testQuery.trim()).equals( - "select * from (\n select 'hi' as faked union all\n select 'ben' as faked union all\n select 'sup?' as faked\n)\n\n-- here ${\"is\"} a `comment\n\n/* ${\"another\"} ` backtick ` containing ```comment */" - ); - expect(testCase.expectedOutputQuery.trim()).equals( - "select 'hi' as faked union all\nselect 'ben' as faked union all\nselect 'sup?' as faked" - ); - - const testCaseFQ = graph.tests.find(t => t.name === "example_test_case_fq_ref"); - expect(testCaseFQ.testQuery.trim()).equals( - "select * from (\n select 'hi' as faked union all\n select 'ben' as faked union all\n select 'sup?' as faked\n)\n\n-- here ${\"is\"} a `comment\n\n/* ${\"another\"} ` backtick ` containing ```comment */" - ); - expect(testCaseFQ.expectedOutputQuery.trim()).equals( - "select 'hi' as faked union all\nselect 'ben' as faked union all\nselect 'sup?' as faked" - ); - - // Check double backslashes don't get converted to singular. - const exampleDoubleBackslash = graph.tables.find( - (t: dataform.ITable) => - targetAsReadableString(t.target) === - dotJoined( - databaseWithSuffix("tada-analytics"), - schemaWithSuffix("df_integration_test"), - "example_double_backslash" - ) - ); - expect(cleanSql(exampleDoubleBackslash.query)).equals( - "select * from regexp_extract('01a_data_engine', '^(\\\\d{2}\\\\w)') select * from regexp_extract('01a_data_engine', r'^(\\d{2}\\w)')" - ); - expect(cleanSql(exampleDoubleBackslash.preOps[0])).equals( - "select * from regexp_extract('\\\\\\\\', '\\\\')" - ); - }); + )}\` AS (SELECT * FROM \`some_database_name.some_external_schema_name.very_important_external_table\`)` + ]); + expect(exampleOperationWithOutput.dependencyTargets).eql([ + dataform.Target.create({ + database: "some_database_name", + schema: "some_external_schema_name", + name: "very_important_external_table" + }) + ]); + expect(exampleOperationWithOutput.actionDescriptor).to.eql( + dataform.ActionDescriptor.create({ + description: "An example operations file which outputs a dataset.", + columns: [ + dataform.ColumnDescriptor.create({ + description: "Just 1!", + path: ["TEST"] + }) + ] + }) + ); + + // Check Operation with tags + const exampleOperationsWithTags = graph.operations.find( + (o: dataform.IOperation) => + targetAsReadableString(o.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_operations_with_tags" + ) + ); + expect(exampleOperationsWithTags.tags).to.eql(["tag1"]); + + // Check declaration. + const exampleDeclaration = graph.declarations.find( + d => + targetAsReadableString(d.target) === + "some_database_name.some_external_schema_name.very_important_external_table" + ); + expect(exampleDeclaration.target).eql( + dataform.Target.create({ + database: "some_database_name", + schema: "some_external_schema_name", + name: "very_important_external_table" + }) + ); + expect(exampleDeclaration.actionDescriptor.description).to.equal( + "This table is not generated by Dataform!" + ); + + // Check testcases. + const testCase = graph.tests.find(t => t.name === "example_test_case"); + expect(testCase.testQuery.trim()).equals( + "select * from (\n select 'hi' as faked union all\n select 'ben' as faked union all\n select 'sup?' as faked\n)\n\n-- here ${\"is\"} a `comment\n\n/* ${\"another\"} ` backtick ` containing ```comment */" + ); + expect(testCase.expectedOutputQuery.trim()).equals( + "select 'hi' as faked union all\nselect 'ben' as faked union all\nselect 'sup?' as faked" + ); + + const testCaseFQ = graph.tests.find(t => t.name === "example_test_case_fq_ref"); + expect(testCaseFQ.testQuery.trim()).equals( + "select * from (\n select 'hi' as faked union all\n select 'ben' as faked union all\n select 'sup?' as faked\n)\n\n-- here ${\"is\"} a `comment\n\n/* ${\"another\"} ` backtick ` containing ```comment */" + ); + expect(testCaseFQ.expectedOutputQuery.trim()).equals( + "select 'hi' as faked union all\nselect 'ben' as faked union all\nselect 'sup?' as faked" + ); + + // Check double backslashes don't get converted to singular. + const exampleDoubleBackslash = graph.tables.find( + (t: dataform.ITable) => + targetAsReadableString(t.target) === + dotJoined( + databaseWithSuffix("tada-analytics"), + schemaWithSuffix("df_integration_test"), + "example_double_backslash" + ) + ); + expect(cleanSql(exampleDoubleBackslash.query)).equals( + "select * from regexp_extract('01a_data_engine', '^(\\\\d{2}\\\\w)') select * from regexp_extract('01a_data_engine', r'^(\\d{2}\\w)')" + ); + expect(cleanSql(exampleDoubleBackslash.preOps[0])).equals( + "select * from regexp_extract('\\\\\\\\', '\\\\')" + ); + }); + } } } } @@ -757,7 +769,7 @@ suite("examples", () => { suite("common_v1", async () => { test("bigquery compiles", async () => { const graph = await compile({ - projectDir: path.resolve("examples/common_v1"), + projectDir: path.join(cleanExamplesRoot, "common_v1"), projectConfigOverride: { warehouse: "bigquery", defaultDatabase: "tada-analytics" } }); const tableNames = graph.tables.map((t: dataform.ITable) => targetAsReadableString(t.target)); @@ -895,7 +907,7 @@ suite("examples", () => { test("bigquery compiles with schema override", async () => { const graph = await compile({ - projectDir: path.resolve("examples/common_v1"), + projectDir: path.join(cleanExamplesRoot, "common_v1"), projectConfigOverride: { warehouse: "redshift", schemaSuffix: "suffix" @@ -911,7 +923,7 @@ suite("examples", () => { test("bigquery compiles with database override", async () => { const graph = await compile({ - projectDir: path.resolve("examples/common_v2"), + projectDir: path.join(cleanExamplesRoot, "common_v2"), projectConfigOverride: { warehouse: "bigquery", defaultDatabase: "overridden-database" @@ -928,7 +940,7 @@ suite("examples", () => { test("redshift compiles", () => { return compile({ - projectDir: "examples/common_v1", + projectDir: path.join(cleanExamplesRoot, "common_v1"), projectConfigOverride: { warehouse: "redshift" } }).then(graph => { const tableNames = graph.tables.map((t: dataform.ITable) => @@ -978,7 +990,7 @@ suite("examples", () => { test("snowflake compiles", async () => { const graph = await compile({ - projectDir: "examples/common_v1", + projectDir: path.join(cleanExamplesRoot, "common_v1"), projectConfigOverride: { warehouse: "snowflake" } }).catch(error => error); expect(graph).to.not.be.an.instanceof(Error); @@ -1119,7 +1131,7 @@ suite("examples", () => { }); test("backwards_compatibility", async () => { - const graph = await compile({ projectDir: "examples/backwards_compatibility" }); + const graph = await compile({ projectDir: path.join(cleanExamplesRoot, "backwards_compatibility") }); const tableNames = graph.tables.map((t: dataform.ITable) => t.target.name); @@ -1135,7 +1147,7 @@ suite("examples", () => { test("times out after timeout period during compilation", async () => { try { - await compile({ projectDir: "examples/never_finishes_compiling" }); + await compile({ projectDir: path.join(cleanExamplesRoot, "never_finishes_compiling") }); fail("Compilation timeout Error expected."); } catch (e) { expect(e.message).to.equal("Compilation timed out"); @@ -1145,7 +1157,7 @@ suite("examples", () => { test("invalid dataform json throws error", async () => { try { await compile({ - projectDir: path.resolve("examples/invalid_dataform_json") + projectDir: path.join(cleanExamplesRoot, "invalid_dataform_json") }); fail("Should have failed."); } catch (e) { @@ -1155,7 +1167,7 @@ suite("examples", () => { test("version is correctly set", async () => { const graph = await compile({ - projectDir: "examples/common_v2", + projectDir: path.join(cleanExamplesRoot, "common_v2"), projectConfigOverride: { warehouse: "bigquery" } }); const { version: expectedVersion } = require("df/core/version"); diff --git a/tools/common/copy.bzl b/tools/common/copy.bzl index c96f6f2ba..6ede78170 100644 --- a/tools/common/copy.bzl +++ b/tools/common/copy.bzl @@ -5,3 +5,17 @@ def copy_file(name, src, out): outs = [out], cmd = "cp -r $(SRCS) $(OUTS)", ) + + +def copy_files(name, map): + for key, value in map.items(): + native.genrule( + name = "copy_" + value, + srcs = [key], + outs = [value], + cmd = "cp $(SRCS) $(OUTS)", + ) + native.filegroup( + name = name, + srcs = map.values() + )