Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scope tracking by workspace #4838

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import java.util.Collections;
import java.util.Map;
import java.util.UUID;
import java.util.function.Supplier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -41,22 +42,22 @@ public LoggingTrackingClient(Supplier<TrackingIdentity> identitySupplier) {
}

@Override
public void identify() {
public void identify(UUID workspaceId) {
LOGGER.info("identify. userId: {}", identitySupplier.get().getCustomerId());
}

@Override
public void alias(String previousCustomerId) {
public void alias(UUID workspaceId, String previousCustomerId) {
LOGGER.info("merge. userId: {} previousUserId: {}", identitySupplier.get().getCustomerId(), previousCustomerId);
}

@Override
public void track(String action) {
track(action, Collections.emptyMap());
public void track(UUID workspaceId, String action) {
track(workspaceId, action, Collections.emptyMap());
}

@Override
public void track(String action, Map<String, Object> metadata) {
public void track(UUID workspaceId, String action, Map<String, Object> metadata) {
LOGGER.info("track. version: {}, userId: {}, action: {}, metadata: {}",
identitySupplier.get().getAirbyteVersion(),
identitySupplier.get().getCustomerId(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.segment.analytics.Analytics;
import com.segment.analytics.messages.AliasMessage;
import com.segment.analytics.messages.IdentifyMessage;
import com.segment.analytics.messages.TrackMessage;
import io.airbyte.config.Configs;
import io.airbyte.config.Configs.WorkerEnvironment;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.function.Supplier;

public class SegmentTrackingClient implements TrackingClient {
Expand All @@ -46,52 +48,63 @@ public class SegmentTrackingClient implements TrackingClient {
private final Analytics analytics;
private final Supplier<TrackingIdentity> identitySupplier;
private final String airbyteRole;
private final WorkerEnvironment deploymentEnvironment;

@VisibleForTesting
SegmentTrackingClient(final Supplier<TrackingIdentity> identitySupplier,
final Configs.WorkerEnvironment deploymentEnvironment,
final String airbyteRole,
final Analytics analytics) {
this.identitySupplier = identitySupplier;
this.deploymentEnvironment = deploymentEnvironment;
this.analytics = analytics;
this.airbyteRole = airbyteRole;
}

public SegmentTrackingClient(final Supplier<TrackingIdentity> identitySupplier, final String airbyteRole) {
this(identitySupplier, airbyteRole, Analytics.builder(SEGMENT_WRITE_KEY).build());
public SegmentTrackingClient(final Supplier<TrackingIdentity> identitySupplier,
final Configs.WorkerEnvironment deploymentEnvironment,
final String airbyteRole) {
this(identitySupplier, deploymentEnvironment, airbyteRole, Analytics.builder(SEGMENT_WRITE_KEY).build());
}

@Override
public void identify() {
public void identify(UUID workspaceId) {
final TrackingIdentity trackingIdentity = identitySupplier.get();
final ImmutableMap.Builder<String, Object> identityMetadataBuilder = ImmutableMap.<String, Object>builder()
.put(AIRBYTE_VERSION_KEY, trackingIdentity.getAirbyteVersion())
.put("anonymized", trackingIdentity.isAnonymousDataCollection())
.put("subscribed_newsletter", trackingIdentity.isNews())
.put("subscribed_security", trackingIdentity.isSecurityUpdates());
final Map<String, Object> identityMetadata = new HashMap<>();

// deployment
identityMetadata.put(AIRBYTE_VERSION_KEY, trackingIdentity.getAirbyteVersion());
identityMetadata.put("deployment_env", deploymentEnvironment);

// workspace (includes info that in the future we would store in an organization)
identityMetadata.put("anonymized", trackingIdentity.isAnonymousDataCollection());
identityMetadata.put("subscribed_newsletter", trackingIdentity.isNews());
identityMetadata.put("subscribed_security", trackingIdentity.isSecurityUpdates());
trackingIdentity.getEmail().ifPresent(email -> identityMetadata.put("email", email));

// other
if (!Strings.isNullOrEmpty(airbyteRole)) {
identityMetadataBuilder.put(AIRBYTE_ROLE, airbyteRole);
identityMetadata.put(AIRBYTE_ROLE, airbyteRole);
}

trackingIdentity.getEmail().ifPresent(email -> identityMetadataBuilder.put("email", email));

analytics.enqueue(IdentifyMessage.builder()
// user id is scoped by workspace. there is no cross-workspace tracking.
.userId(trackingIdentity.getCustomerId().toString())
.traits(identityMetadataBuilder.build()));
.traits(identityMetadata));
}

@Override
public void alias(String previousCustomerId) {
public void alias(UUID workspaceId, String previousCustomerId) {
analytics.enqueue(AliasMessage.builder(previousCustomerId).userId(identitySupplier.get().getCustomerId().toString()));
}

@Override
public void track(String action) {
track(action, Collections.emptyMap());
public void track(UUID workspaceId, String action) {
track(workspaceId, action, Collections.emptyMap());
}

@Override
public void track(String action, Map<String, Object> metadata) {
public void track(UUID workspaceId, String action, Map<String, Object> metadata) {
final Map<String, Object> mapCopy = new HashMap<>(metadata);
final TrackingIdentity trackingIdentity = identitySupplier.get();
mapCopy.put(AIRBYTE_VERSION_KEY, trackingIdentity.getAirbyteVersion());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@
package io.airbyte.analytics;

import java.util.Map;
import java.util.UUID;

public interface TrackingClient {

void identify();
void identify(UUID workspaceId);

void alias(String previousCustomerId);
void alias(UUID workspaceId, String previousCustomerId);

void track(String action);
void track(UUID workspaceId, String action);

void track(String action, Map<String, Object> metadata);
void track(UUID workspaceId, String action, Map<String, Object> metadata);

}
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,15 @@ static void initialize(TrackingClient trackingClient) {
}

public static void initialize(final Configs.TrackingStrategy trackingStrategy,
final Configs.WorkerEnvironment deploymentEnvironment,
final String airbyteRole,
final String airbyteVersion,
final ConfigRepository configRepository) {
initialize(createTrackingClient(trackingStrategy, airbyteRole, () -> getTrackingIdentity(configRepository, airbyteVersion)));
initialize(createTrackingClient(
trackingStrategy,
deploymentEnvironment,
airbyteRole,
() -> getTrackingIdentity(configRepository, airbyteVersion)));
}

// fallback on a logging client with an empty identity.
Expand Down Expand Up @@ -93,6 +98,7 @@ static TrackingIdentity getTrackingIdentity(ConfigRepository configRepository, S
* Creates a tracking client that uses the appropriate strategy from an identity supplier.
*
* @param trackingStrategy - what type of tracker we want to use.
* @param deploymentEnvironment - the environment that airbyte is running in.
* @param airbyteRole
* @param trackingIdentitySupplier - how we get the identity of the user. we have a supplier,
* because we if the identity updates over time (which happens during initial setup), we
Expand All @@ -101,10 +107,11 @@ static TrackingIdentity getTrackingIdentity(ConfigRepository configRepository, S
*/
@VisibleForTesting
static TrackingClient createTrackingClient(final Configs.TrackingStrategy trackingStrategy,
final Configs.WorkerEnvironment deploymentEnvironment,
final String airbyteRole,
final Supplier<TrackingIdentity> trackingIdentitySupplier) {
return switch (trackingStrategy) {
case SEGMENT -> new SegmentTrackingClient(trackingIdentitySupplier, airbyteRole);
case SEGMENT -> new SegmentTrackingClient(trackingIdentitySupplier, deploymentEnvironment, airbyteRole);
case LOGGING -> new LoggingTrackingClient(trackingIdentitySupplier);
default -> throw new IllegalStateException("unrecognized tracking strategy");
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import static org.mockito.Mockito.when;

import io.airbyte.config.Configs;
import io.airbyte.config.Configs.WorkerEnvironment;
import io.airbyte.config.StandardWorkspace;
import io.airbyte.config.persistence.ConfigNotFoundException;
import io.airbyte.config.persistence.ConfigRepository;
Expand Down Expand Up @@ -58,6 +59,7 @@ void testCreateTrackingClientLogging() {
assertTrue(
TrackingClientSingleton.createTrackingClient(
Configs.TrackingStrategy.LOGGING,
WorkerEnvironment.DOCKER,
"role",
TrackingIdentity::empty) instanceof LoggingTrackingClient);
}
Expand All @@ -67,6 +69,7 @@ void testCreateTrackingClientSegment() {
assertTrue(
TrackingClientSingleton.createTrackingClient(
Configs.TrackingStrategy.SEGMENT,
WorkerEnvironment.DOCKER,
"role",
TrackingIdentity::empty) instanceof SegmentTrackingClient);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ public static void main(String[] args) throws IOException, InterruptedException

TrackingClientSingleton.initialize(
configs.getTrackingStrategy(),
configs.getWorkerEnvironment(),
configs.getAirbyteRole(),
configs.getAirbyteVersion(),
configRepository);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ public class DefaultJobPersistence implements JobPersistence {
private static final JSONFormat DB_JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT);
protected static final String DEFAULT_SCHEMA = "public";
private static final String BACKUP_SCHEMA = "import_backup";
public static final String DEPLOYMENT_ID_KEY = "deployment_id";
public static final String METADATA_KEY_COL = "KEY";
public static final String METADATA_VAL_COL = "value";

@VisibleForTesting
static final String BASE_JOB_SELECT_AND_JOIN =
Expand Down Expand Up @@ -457,10 +460,43 @@ public Optional<String> getVersion() throws IOException {
@Override
public void setVersion(String airbyteVersion) throws IOException {
database.query(ctx -> ctx.execute(String.format(
"INSERT INTO %s VALUES('%s', '%s'), ('%s_init_db', '%s') ON CONFLICT (key) DO UPDATE SET value = '%s'",
"INSERT INTO %s(%s, %s) VALUES('%s', '%s'), ('%s_init_db', '%s') ON CONFLICT (key) DO UPDATE SET value = '%s'",
AIRBYTE_METADATA_TABLE,
AirbyteVersion.AIRBYTE_VERSION_KEY_NAME, airbyteVersion,
current_timestamp(), airbyteVersion, airbyteVersion)));
METADATA_KEY_COL,
METADATA_VAL_COL,
AirbyteVersion.AIRBYTE_VERSION_KEY_NAME,
airbyteVersion,
current_timestamp(),
airbyteVersion,
airbyteVersion)));
}

@Override
public Optional<UUID> getDeployment() throws IOException {
final Result<Record> result = database.query(ctx -> ctx.select()
.from(AIRBYTE_METADATA_TABLE)
.where(DSL.field("key").eq(DEPLOYMENT_ID_KEY))
.fetch());
return result.stream().findFirst().map(r -> UUID.fromString(r.getValue("value", String.class)));
}

@Override
public void setDeployment(UUID deployment) throws IOException {
final UUID committedDeploymentId = database.query(ctx -> ctx.fetch(String.format(
"INSERT INTO %s(%s, %s) VALUES('%s', '%s') ON CONFLICT (key) DO NOTHING RETURNING value",
AIRBYTE_METADATA_TABLE,
METADATA_KEY_COL,
METADATA_VAL_COL,
DEPLOYMENT_ID_KEY,
deployment)))
.stream()
.map(record -> UUID.fromString(record.get("key", String.class)))
.findFirst()
.orElseThrow();

if (!deployment.equals(committedDeploymentId)) {
LOGGER.warn("Attempted to set a deployment id %s, but deployment id %s already set. Retained original value.");
}
}

private static String current_timestamp() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,27 @@ public interface JobPersistence {
/// ARCHIVE

/**
* Returns the AirbyteVersion stored in the database
* Returns the AirbyteVersion.
*/
Optional<String> getVersion() throws IOException;

/**
* Set the database to @param AirbyteVersion
* Set the airbyte version
*/
void setVersion(String airbyteVersion) throws IOException;

/**
* Returns a deployment UUID.
*/
Optional<UUID> getDeployment() throws IOException;
// a deployment references a setup of airbyte. it is created the first time the docker compose or
// K8s is ready.

/**
* Set deployment id. If one is already set, the new value is ignored.
*/
void setDeployment(UUID uuid) throws IOException;

/**
* Export all SQL tables from @param schema into streams of JsonNode objects. This returns a Map of
* table schemas to the associated streams of records that is being exported.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,11 @@ private void dumpDatabase(Path parentFolder) throws Exception {
}
}

private void writeTableToArchive(final Path tablePath, final Stream<JsonNode> tableStream)
throws Exception {
private void writeTableToArchive(final Path tablePath, final Stream<JsonNode> tableStream) throws Exception {
Files.createDirectories(tablePath.getParent());
final BufferedWriter recordOutputWriter = new BufferedWriter(
new FileWriter(tablePath.toFile()));
final BufferedWriter recordOutputWriter = new BufferedWriter(new FileWriter(tablePath.toFile()));
final CloseableConsumer<JsonNode> recordConsumer = Yamls.listWriter(recordOutputWriter);
tableStream.forEach(row -> Exceptions.toRuntime(() -> {
recordConsumer.accept(row);
}));
tableStream.forEach(row -> Exceptions.toRuntime(() -> recordConsumer.accept(row)));
recordConsumer.close();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Streams;
import io.airbyte.analytics.TrackingClientSingleton;
import io.airbyte.api.model.ImportRead;
Expand All @@ -42,6 +44,7 @@
import io.airbyte.config.persistence.ConfigRepository;
import io.airbyte.config.persistence.PersistenceConstants;
import io.airbyte.scheduler.persistence.DatabaseSchema;
import io.airbyte.scheduler.persistence.DefaultJobPersistence;
import io.airbyte.scheduler.persistence.JobPersistence;
import io.airbyte.validation.json.JsonSchemaValidator;
import io.airbyte.validation.json.JsonValidationException;
Expand Down Expand Up @@ -319,7 +322,13 @@ public void importDatabaseFromArchive(final Path storageRoot, final String airby
final Map<DatabaseSchema, Stream<JsonNode>> data = new HashMap<>();
for (DatabaseSchema tableType : DatabaseSchema.values()) {
final Path tablePath = buildTablePath(storageRoot, tableType.name());
data.put(tableType, readTableFromArchive(tableType, tablePath));
Stream<JsonNode> tableStream = readTableFromArchive(tableType, tablePath);

if (tableType == DatabaseSchema.AIRBYTE_METADATA) {
tableStream = replaceDeploymentMetadata(postgresPersistence, tableStream);
}

data.put(tableType, tableStream);
}
postgresPersistence.importDatabase(airbyteVersion, data);
LOGGER.info("Successful upgrade of airbyte postgres database from archive");
Expand All @@ -330,6 +339,36 @@ public void importDatabaseFromArchive(final Path storageRoot, final String airby
}
}

/**
* The deployment concept is specific to the environment that Airbyte is running in (not the data
* being imported). Thus, if there is a deployment in the imported data, we filter it out. In
* addition, before running the import, we look up the current deployment id, and make sure that
* that id is inserted when we run the import.
*
* @param postgresPersistence - database that we are importing into.
* @param metadataTableStream - stream of records to be imported into the metadata table.
* @return modified stream with old deployment id removed and correct deployment id inserted.
* @throws IOException - you never know when you IO.
*/
private static Stream<JsonNode> replaceDeploymentMetadata(JobPersistence postgresPersistence,
Stream<JsonNode> metadataTableStream)
throws IOException {
// filter out the deployment record from the import data, if it exists.
Stream<JsonNode> stream = metadataTableStream
.filter(record -> record.get(DefaultJobPersistence.METADATA_KEY_COL).asText().equals(DatabaseSchema.AIRBYTE_METADATA.toString()));

// insert the current deployment id, if it exists.
final Optional<UUID> deploymentOptional = postgresPersistence.getDeployment();
if (deploymentOptional.isPresent()) {
final JsonNode deploymentRecord = Jsons.jsonNode(ImmutableMap.<String, String>builder()
.put(DefaultJobPersistence.METADATA_KEY_COL, DefaultJobPersistence.DEPLOYMENT_ID_KEY)
.put(DefaultJobPersistence.METADATA_VAL_COL, deploymentOptional.get().toString())
.build());
stream = Streams.concat(stream, Stream.of(deploymentRecord));
}
return stream;
}

protected static Path buildTablePath(final Path storageRoot, final String tableName) {
return storageRoot
.resolve(DB_FOLDER_NAME)
Expand Down
Loading