Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scope operations by workspace #4845

Merged
merged 10 commits into from
Jul 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.27.5-alpha
current_version = 0.28.0-alpha
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-[a-z]+)?
Expand Down
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION=0.27.5-alpha
VERSION=0.28.0-alpha

# Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db
DATABASE_USER=docker
Expand Down
12 changes: 9 additions & 3 deletions airbyte-api/src/main/openapi/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ paths:
content:
application/json:
schema:
$ref: "#/components/schemas/OperationCreateOrUpdate"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clarifying naming here. This struct was used for 2 endpoints: operations/check and webackend/connections/update (which is really an update or create. the check endpoint does not need all the fields the update endpoint needs. So i split them into 2 different structs with only the fields they need and more granular names.

$ref: "#/components/schemas/OperatorConfiguration"
required: true
responses:
"200":
Expand Down Expand Up @@ -2074,7 +2074,7 @@ components:
operations:
type: array
items:
$ref: "#/components/schemas/OperationCreateOrUpdate"
$ref: "#/components/schemas/WebBackendOperationCreateOrUpdate"
ConnectionRead:
type: object
required:
Expand Down Expand Up @@ -2173,7 +2173,10 @@ components:
required:
- name
- operatorConfiguration
- workspaceId
properties:
workspaceId:
$ref: "#/components/schemas/WorkspaceId"
name:
type: string
operatorConfiguration:
Expand All @@ -2191,7 +2194,7 @@ components:
type: string
operatorConfiguration:
$ref: "#/components/schemas/OperatorConfiguration"
OperationCreateOrUpdate:
WebBackendOperationCreateOrUpdate:
type: object
required:
- name
Expand All @@ -2209,7 +2212,10 @@ components:
- operationId
- name
- operatorConfiguration
- workspaceId
properties:
workspaceId:
$ref: "#/components/schemas/WorkspaceId"
operationId:
$ref: "#/components/schemas/OperationId"
name:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ title: StandardSyncOperation
description: Configuration of an operation to apply during a sync
type: object
required:
- workspaceId
- operationId
- name
- operatorType
Expand All @@ -28,3 +29,6 @@ properties:
if not set or false, the configuration is active. if true, then this
configuration is permanently off.
type: boolean
workspaceId:
type: string
format: uuid
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@
public class PersistenceConstants {

// for MVP we only support one workspace per deployment and we hard code its id.
public static UUID DEFAULT_WORKSPACE_ID = UUID.fromString("5ae6b09b-fdec-41af-aaf7-7d94cfc33ef6");
public static final UUID DEFAULT_WORKSPACE_ID = UUID.fromString("5ae6b09b-fdec-41af-aaf7-7d94cfc33ef6");

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* MIT License
*
* Copyright (c) 2020 Airbyte
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package io.airbyte.config.persistence;

import io.airbyte.config.DestinationConnection;
import io.airbyte.config.SourceConnection;
import java.util.UUID;

// todo (cgardens) - this is just a utility for this PR. will need to figure out the "right" way to
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as the comment says, i expect this to be thrown out once @jrhizor merges his stuff. if that's not the case i can make this better.

// do this with jared.
public class WorkspaceFinder {

public static UUID getWorkspaceForSourceId(SourceConnection source) {
return source.getWorkspaceId();
}

public static UUID getWorkspaceForDestination(DestinationConnection destination) {
return destination.getWorkspaceId();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import io.airbyte.migrate.migrations.MigrationV0_25_0;
import io.airbyte.migrate.migrations.MigrationV0_26_0;
import io.airbyte.migrate.migrations.MigrationV0_27_0;
import io.airbyte.migrate.migrations.MigrationV0_28_0;
import io.airbyte.migrate.migrations.NoOpMigration;
import java.util.List;

Expand All @@ -55,6 +56,7 @@ public class Migrations {
private static final Migration MIGRATION_V_0_25_0 = new MigrationV0_25_0(MIGRATION_V_0_24_0);
private static final Migration MIGRATION_V_0_26_0 = new MigrationV0_26_0(MIGRATION_V_0_25_0);
private static final Migration MIGRATION_V_0_27_0 = new MigrationV0_27_0(MIGRATION_V_0_26_0);
private static final Migration MIGRATION_V_0_28_0 = new MigrationV0_28_0(MIGRATION_V_0_27_0);

// all migrations must be added to the list in the order that they should be applied.
public static final List<Migration> MIGRATIONS = ImmutableList.of(
Expand All @@ -72,6 +74,7 @@ public class Migrations {
MIGRATION_V_0_24_0,
MIGRATION_V_0_25_0,
MIGRATION_V_0_26_0,
MIGRATION_V_0_27_0);
MIGRATION_V_0_27_0,
MIGRATION_V_0_28_0);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* MIT License
*
* Copyright (c) 2020 Airbyte
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package io.airbyte.migrate.migrations;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.annotations.VisibleForTesting;
import io.airbyte.migrate.Migration;
import io.airbyte.migrate.MigrationUtils;
import io.airbyte.migrate.ResourceId;
import io.airbyte.migrate.ResourceType;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.function.Consumer;
import java.util.stream.Stream;

public class MigrationV0_28_0 extends BaseMigration implements Migration {

private static final UUID DEFAULT_WORKSPACE_ID = UUID.fromString("5ae6b09b-fdec-41af-aaf7-7d94cfc33ef6");

private static final ResourceId CONNECTION_RESOURCE_ID = ResourceId.fromConstantCase(ResourceType.CONFIG, "STANDARD_SYNC");
private static final ResourceId SOURCE_RESOURCE_ID = ResourceId.fromConstantCase(ResourceType.CONFIG, "SOURCE_CONNECTION");
private static final ResourceId OPERATION_RESOURCE_ID = ResourceId.fromConstantCase(ResourceType.CONFIG, "STANDARD_SYNC_OPERATION");

private static final String MIGRATION_VERSION = "0.28.0-alpha";
@VisibleForTesting
protected final Migration previousMigration;

public MigrationV0_28_0(Migration previousMigration) {
super(previousMigration);
this.previousMigration = previousMigration;
}

@Override
public String getVersion() {
return MIGRATION_VERSION;
}

private static final Path RESOURCE_PATH = Path.of("migrations/migrationV0_28_0/airbyte_config");

@Override
public Map<ResourceId, JsonNode> getOutputSchema() {
final Map<ResourceId, JsonNode> outputSchema = new HashMap<>(previousMigration.getOutputSchema());
outputSchema.put(
OPERATION_RESOURCE_ID,
MigrationUtils.getSchemaFromResourcePath(RESOURCE_PATH, OPERATION_RESOURCE_ID));
return outputSchema;
}

@Override
public void migrate(Map<ResourceId, Stream<JsonNode>> inputDataImmutable,
Map<ResourceId, Consumer<JsonNode>> outputData) {
// we need to figure out which workspace to associate an operation with. we use the following
// strategy to avoid ever storing too much info in memory:
// 1. iterate over connectors stream
// 2. build mapping of connections to source
// 3. build mapping of operation ids to connections
// 4. iterate over sources stream
// 5. build mapping of sources to workspaces
// 6. iterate over operations stream,
// 7. map from operation => connection => source => workspace. set that workspace for the operation.
// 8. if no mapping use default workspace id

final Map<UUID, UUID> connectionIdToSourceId = new HashMap<>();
final Map<UUID, UUID> operationIdToConnectionId = new HashMap<>();
final Map<UUID, UUID> sourceIdToWorkspaceId = new HashMap<>();

final Map<ResourceId, Stream<JsonNode>> inputData = new HashMap<>(inputDataImmutable);
// process connections.
inputData.remove(CONNECTION_RESOURCE_ID).forEach(r -> {
final UUID connectionId = UUID.fromString(r.get("connectionId").asText());
final UUID sourceId = UUID.fromString(r.get("sourceId").asText());
connectionIdToSourceId.put(connectionId, sourceId);
if (r.hasNonNull("operationIds")) {
r.get("operationIds").forEach(operationIdString -> {
final UUID operationId = UUID.fromString(operationIdString.asText());
operationIdToConnectionId.put(operationId, connectionId);
});
}

outputData.get(CONNECTION_RESOURCE_ID).accept(r);
});
// process sources.
inputData.remove(SOURCE_RESOURCE_ID).forEach(r -> {
final UUID sourceId = UUID.fromString(r.get("sourceId").asText());
final UUID workspaceId = UUID.fromString(r.get("workspaceId").asText());
sourceIdToWorkspaceId.put(sourceId, workspaceId);

outputData.get(SOURCE_RESOURCE_ID).accept(r);
});
// process operations.
inputData.remove(OPERATION_RESOURCE_ID).forEach(r -> {
final UUID operationId = UUID.fromString(r.get("operationId").asText());

final UUID workspaceId;
final UUID connectionId = operationIdToConnectionId.get(operationId);
if (connectionId == null) {
workspaceId = DEFAULT_WORKSPACE_ID;
} else {
final UUID sourceId = connectionIdToSourceId.get(connectionId);
workspaceId = sourceIdToWorkspaceId.get(sourceId);
}
((ObjectNode) r).put("workspaceId", workspaceId.toString());

outputData.get(OPERATION_RESOURCE_ID).accept(r);
});

// process the remaining resources.
for (final Map.Entry<ResourceId, Stream<JsonNode>> entry : inputData.entrySet()) {
final Consumer<JsonNode> recordConsumer = outputData.get(entry.getKey());
entry.getValue().forEach(recordConsumer);
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
"$schema": http://json-schema.org/draft-07/schema#
"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/OperatorDbt.yaml
title: OperatorDbt
description: Settings for a DBT operator
type: object
required:
- gitRepoUrl
additionalProperties: false
properties:
gitRepoUrl:
type: string
gitRepoBranch:
type: string
dockerImage:
type: string
dbtArguments:
type: string
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
"$schema": http://json-schema.org/draft-07/schema#
"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/OperatorNormalization.yaml
title: OperatorNormalization
description: Settings for a normalization operator
type: object
additionalProperties: false
properties:
option:
type: string
enum:
- basic
#- unnesting
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
"$schema": http://json-schema.org/draft-07/schema#
"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/OperatorType.yaml
title: OperatorType
description: Type of Operator
type: string
enum:
# - destination
- normalization
- dbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
"$schema": http://json-schema.org/draft-07/schema#
"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/StandardSyncOperation.yaml
title: StandardSyncOperation
description: Configuration of an operation to apply during a sync
type: object
required:
- workspaceId
- operationId
- name
- operatorType
additionalProperties: false
properties:
operationId:
type: string
format: uuid
name:
type: string
# Instead of this type field, we would prefer a json schema "oneOf" but unfortunately,
# the jsonschema2pojo does not seem to support it yet: https://github.com/joelittlejohn/jsonschema2pojo/issues/392
operatorType:
"$ref": OperatorType.yaml
operatorNormalization:
"$ref": OperatorNormalization.yaml
operatorDbt:
"$ref": OperatorDbt.yaml
tombstone:
description:
if not set or false, the configuration is active. if true, then this
configuration is permanently off.
type: boolean
workspaceId:
type: string
format: uuid
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,8 @@ void testMigration() throws IOException {
.of(STANDARD_SYNC_RESOURCE_ID,
getResourceStream(OUTPUT_CONFIG_PATH + "/STANDARD_SYNC.yaml")
.collect(Collectors.toList()));
final Map<ResourceId, List<JsonNode>> expectedOutput =
MigrationTestUtils
.createExpectedOutput(migration.getOutputSchema().keySet(), expectedOutputOverrides);
final Map<ResourceId, List<JsonNode>> expectedOutput = MigrationTestUtils
.createExpectedOutput(migration.getOutputSchema().keySet(), expectedOutputOverrides);

final Map<ResourceId, List<JsonNode>> outputAsList = MigrationTestUtils
.collectConsumersToList(outputConsumer);
Expand Down
Loading