From 448dd2869842210bbd240edb02198af69d1d9055 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Thu, 29 Jun 2023 15:46:06 -0700
Subject: [PATCH 01/46] intiial implementation

---
 .../base-typing-deduping-test/build.gradle    |  14 +
 .../BaseTypingDedupingTest.java               | 335 ++++++++++++++++++
 .../src/main/resources/schema.json            |  23 ++
 ...drecords_fullrefresh_overwrite_final.jsonl |   2 +
 ...tedrecords_fullrefresh_overwrite_raw.jsonl |   3 +
 .../src/main/resources/sync1_messages.jsonl   |   5 +
 .../destination-bigquery/build.gradle         |   1 +
 .../bigquery/BigQueryDestination.java         |   2 +-
 .../BigQuerySqlGeneratorIntegrationTest.java  |  34 +-
 ...ueryStandardInsertsTypingDedupingTest.java |  82 +++++
 settings.gradle                               |   1 +
 11 files changed, 487 insertions(+), 15 deletions(-)
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/build.gradle
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/schema.json
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_final.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
 create mode 100644 airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/build.gradle b/airbyte-integrations/bases/base-typing-deduping-test/build.gradle
new file mode 100644
index 000000000000..cfbf0a72d513
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/build.gradle
@@ -0,0 +1,14 @@
+plugins {
+    id 'java-library'
+}
+
+dependencies {
+  implementation project(':airbyte-config-oss:config-models-oss')
+  implementation project(':airbyte-connector-test-harnesses:acceptance-test-harness')
+  implementation libs.airbyte.protocol
+
+  implementation(enforcedPlatform('org.junit:junit-bom:5.8.2'))
+  implementation 'org.junit.jupiter:junit-jupiter-api'
+  implementation 'org.junit.jupiter:junit-jupiter-params'
+  implementation 'org.mockito:mockito-core:4.6.1'
+}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
new file mode 100644
index 000000000000..62bd9bcaabfc
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -0,0 +1,335 @@
+package io.airbyte.integrations.base.destination.typing_deduping;
+
+import static org.junit.jupiter.api.Assertions.assertAll;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.Streams;
+import io.airbyte.commons.features.EnvVariableFeatureFlags;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.commons.lang.Exceptions;
+import io.airbyte.commons.resources.MoreResources;
+import io.airbyte.commons.string.Strings;
+import io.airbyte.configoss.WorkerDestinationConfig;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.AirbyteStream;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
+import io.airbyte.protocol.models.v0.DestinationSyncMode;
+import io.airbyte.protocol.models.v0.SyncMode;
+import io.airbyte.workers.internal.AirbyteDestination;
+import io.airbyte.workers.internal.DefaultAirbyteDestination;
+import io.airbyte.workers.process.AirbyteIntegrationLauncher;
+import io.airbyte.workers.process.DockerProcessFactory;
+import io.airbyte.workers.process.ProcessFactory;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Objects;
+import java.util.UUID;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This is loosely based on standard-destination-tests's DestinationAcceptanceTest class. The sync-running code is copy-pasted from there.
+ * <p>
+ * All tests use a single stream, whose schema is defined in {@code resources/schema.json}. Each test case constructs a
+ * ConfiguredAirbyteCatalog dynamically.
+ * <p>
+ * For sync modes which use a primary key, the stream provides a composite key of (id1, id2). For sync modes which use a
+ * cursor, the stream provides an updated_at field. The stream also has an _ab_cdc_deleted_at field.
+ */
+public abstract class BaseTypingDedupingTest {
+  private static final Logger LOGGER = LoggerFactory.getLogger(BaseTypingDedupingTest.class);
+  private static final Comparator<JsonNode> RAW_RECORD_IDENTITY_COMPARATOR = Comparator
+      .comparingLong((JsonNode record) -> asInt(record.get("_airbyte_data").get("id1")))
+      .thenComparingLong(record -> asInt(record.get("_airbyte_data").get("id2")))
+      .thenComparing(record -> asTimestamp(record.get("_airbyte_data").get("updated_at")))
+      .thenComparing(record -> asTimestamp(record.get("_airbyte_extracted_at")));
+  private static final Comparator<JsonNode> RAW_RECORD_SORT_COMPARATOR = RAW_RECORD_IDENTITY_COMPARATOR
+      .thenComparing(record -> asString(record.get("_airbyte_raw_id")));
+  private static final Comparator<JsonNode> FINAL_RECORD_IDENTITY_COMPARATOR = Comparator
+      .comparingLong((JsonNode record) -> asInt(record.get("id1")))
+      .thenComparingLong(record -> asInt(record.get("id2")))
+      .thenComparing(record -> asTimestamp(record.get("updated_at")))
+      .thenComparing(record -> asTimestamp(record.get("_airbyte_extracted_at")));
+  private static final Comparator<JsonNode> FINAL_RECORD_SORT_COMPARATOR = FINAL_RECORD_IDENTITY_COMPARATOR
+      .thenComparing(record -> asString(record.get("_airbyte_raw_id")));
+  private static ProcessFactory processFactory;
+
+  /**
+   * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this field.
+   * <p>
+   * That method should also start testcontainer(s), if you're using them. That test container will be used for all
+   * tests. This is safe because each test uses a randomized stream namespace+name.
+   */
+  protected static JsonNode config;
+
+  private String streamNamespace;
+  private String streamName;
+
+  /**
+   * @return the docker image to run, e.g. {@code "airbyte/destination-bigquery:dev"}.
+   */
+  protected abstract String getImageName();
+
+  /**
+   * For a given stream, return the records that exist in the destination's raw table. This _should_ include metadata columns (e.g. _airbyte_raw_id).
+   * The {@code _airbyte_data} column MUST be an {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
+   */
+  protected abstract List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws Exception;
+
+  /**
+   * For a given stream, return the records that exist in the destination's final table. This _should_ include metadata columns (e.g. _airbyte_raw_id).
+   */
+  protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
+
+  /**
+   * Create raw+final tables in the destinations as though a previous sync had loaded {@code initialRecords}. This method
+   * exists so that we don't need to run a sync just to load initial state, because that's both slow and error-prone.
+   */
+  protected abstract void loadInitialRecords(String streamNamespace, String streamName, List<JsonNode> initialRecords) throws Exception;
+
+  /**
+   * Delete any resources in the destination associated with this stream AND its namespace. We need this because we write
+   * raw tables to a shared {@code airbyte} namespace, which we can't drop wholesale.
+   * <p>
+   * In general, this should resemble {@code DROP TABLE airbyte.namespace_name; DROP SCHEMA namespace}.
+   */
+  protected abstract void teardownStreamAndNamespace(String streamNamespace, String streamName) throws Exception;
+
+  @BeforeEach
+  public void setup() {
+    streamNamespace = Strings.addRandomSuffix("typing_deduping_test_namespace", "_", 5);
+    streamName = Strings.addRandomSuffix("test_stream", "_", 5);
+    LOGGER.info("Using stream namespace {} and name {}", streamNamespace, streamName);
+  }
+
+  @AfterEach
+  public void teardown() throws Exception {
+    teardownStreamAndNamespace(streamNamespace, streamName);
+  }
+
+  /**
+   * Starting with an empty destination, execute a full refresh overwrite sync. Verify that the records are written to
+   * the destination table.
+   */
+  @Test
+  public void initialFullRefreshOverwrite() throws Exception {
+    ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
+        new ConfiguredAirbyteStream()
+            .withSyncMode(SyncMode.FULL_REFRESH)
+            .withDestinationSyncMode(DestinationSyncMode.OVERWRITE)
+            .withStream(new AirbyteStream()
+                .withNamespace(streamNamespace)
+                .withName(streamName)
+                .withJsonSchema(getSchema()))));
+    List<AirbyteMessage> messages = readMessages("sync1_messages.jsonl");
+
+    runSync(catalog, messages);
+
+    List<JsonNode> expectedRawRecords = readRecords("sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl");
+    List<JsonNode> expectedFinalRecords = readRecords("sync1_expectedrecords_fullrefresh_overwrite_final.jsonl");
+    verifySyncResult(expectedRawRecords, expectedFinalRecords);
+  }
+
+  private static JsonNode getSchema() throws IOException {
+    return Jsons.deserialize(MoreResources.readResource("schema.json"));
+  }
+
+  private List<AirbyteMessage> readMessages(String filename) throws IOException {
+    return MoreResources.readResource(filename).lines()
+        .filter(line -> !line.startsWith("//"))
+        .map(jsonString -> Jsons.deserialize(jsonString, AirbyteMessage.class))
+        .peek(message -> {
+          message.getRecord().setNamespace(streamNamespace);
+          message.getRecord().setStream(streamName);
+        }).toList();
+  }
+
+  private List<JsonNode> readRecords(String filename) throws IOException {
+    return MoreResources.readResource(filename).lines()
+        .filter(line -> !line.startsWith("//"))
+        .map(Jsons::deserialize)
+        .toList();
+  }
+
+  private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode> expectedFinalRecords) throws Exception {
+    List<JsonNode> actualRawRecords = dumpRawTableRecords(streamNamespace, streamName);
+    String rawDiff = diffRawTableRecords(expectedRawRecords, actualRawRecords);
+    List<JsonNode> actualFinalRecords = dumpFinalTableRecords(streamNamespace, streamName);
+    String finalDiff = diffFinalTableRecords(expectedFinalRecords, actualFinalRecords);
+
+    assertAll(
+        () -> assertTrue(rawDiff.isEmpty(), "Raw table was incorrect.\n" + rawDiff),
+        () -> assertTrue(finalDiff.isEmpty(), "Final table was incorrect.\n" + finalDiff)
+    );
+  }
+
+  private static String diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
+    return diffRecords(expectedRecords, actualRecords, RAW_RECORD_IDENTITY_COMPARATOR, RAW_RECORD_SORT_COMPARATOR);
+  }
+
+  private static String diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
+    return diffRecords(expectedRecords, actualRecords, FINAL_RECORD_IDENTITY_COMPARATOR, FINAL_RECORD_SORT_COMPARATOR);
+  }
+
+  /**
+   * Generate a human-readable diff between the two lists. Only checks the keys specified in expectedRecords.
+   *
+   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same PK+cursor+extracted_at)
+   * @param sortComparator Behaves identically to identityComparator, but if two records are the same, breaks that tie using _airbyte_raw_id
+   * @return The diff, or empty string if there were no differences
+   */
+  private static String diffRecords(
+      List<JsonNode> originalExpectedRecords,
+      List<JsonNode> originalActualRecords,
+      Comparator<JsonNode> identityComparator, Comparator<JsonNode> sortComparator) {
+    List<JsonNode> expectedRecords = originalExpectedRecords.stream().sorted(sortComparator).toList();
+    List<JsonNode> actualRecords = originalActualRecords.stream().sorted(sortComparator).toList();
+
+    // Iterate through both lists in parallel and compare each record.
+    // Build up an error message listing any incorrect, missing, or unexpected records.
+    // Not a true diff, but close enough.
+    String message = "";
+    int expectedRecordIndex = 0;
+    int actualRecordIndex = 0;
+    while (expectedRecordIndex < expectedRecords.size() && actualRecordIndex < actualRecords.size()) {
+      JsonNode expectedRecord = expectedRecords.get(expectedRecordIndex);
+      JsonNode actualRecord = actualRecords.get(actualRecordIndex);
+      int compare = identityComparator.compare(expectedRecord, actualRecord);
+      if (compare == 0) {
+        // These records should be the same. Find the specific fields that are different.
+        boolean foundMismatch = false;
+        String mismatchedRecordMessage = "Row had incorrect data:\n";
+        for (String key : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
+          JsonNode expectedValue = expectedRecord.get(key);
+          JsonNode actualValue = actualRecord.get(key);
+          // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
+          if (!Objects.equals(expectedValue, actualValue)
+              // Objects.equals expects the two values to be the same class.
+              // We need to handle comparisons between e.g. LongNode and IntNode.
+              && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
+              && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble())) {
+            mismatchedRecordMessage += "  For key " + key + ", expected " + expectedValue + " but got " + actualValue + "\n";
+            foundMismatch = true;
+          }
+        }
+        if (foundMismatch) {
+          message += mismatchedRecordMessage;
+        }
+
+        expectedRecordIndex++;
+        actualRecordIndex++;
+      } else if (compare < 0) {
+        // The expected record is missing from the actual records. Print it and move on to the next expected record.
+        message += "Row was expected but missing: " + expectedRecord + "\n";
+        expectedRecordIndex++;
+      } else {
+        // There's an actual record which isn't present in the expected records. Print it and move on to the next actual record.
+        message += "Row was not expected but present: " + actualRecord + "\n";
+        actualRecordIndex++;
+      }
+    }
+    // Tail loops in case we reached the end of one list before the other.
+    while (expectedRecordIndex < expectedRecords.size()) {
+      message += "Row was expected but missing: " + expectedRecords.get(expectedRecordIndex) + "\n";
+      expectedRecordIndex++;
+    }
+    while (actualRecordIndex < actualRecords.size()) {
+      message += "Row was not expected but present: " + actualRecords.get(actualRecordIndex) + "\n";
+      actualRecordIndex++;
+    }
+
+    return message;
+  }
+
+  private static long asInt(JsonNode node) {
+    if (node == null || !node.isIntegralNumber()) {
+      return Integer.MIN_VALUE;
+    } else {
+      return node.longValue();
+    }
+  }
+
+  private static String asString(JsonNode node) {
+    if (node == null || node.isNull()) {
+      return "";
+    } else if (node.isTextual()) {
+      return node.asText();
+    } else {
+      return Jsons.serialize(node);
+    }
+  }
+
+  private static Instant asTimestamp(JsonNode node) {
+    if (node == null || !node.isTextual()) {
+      return Instant.ofEpochMilli(Long.MIN_VALUE);
+    } else {
+      return Instant.parse(node.asText());
+    }
+  }
+
+  /* !!!!!! WARNING !!!!!!
+   * The code below was mostly copypasted from DestinationAcceptanceTest. If you make edits here, you probably want to also edit there.
+   * !!!!!!!!!!!!!!!!!!!!!
+   */
+
+  private static Path jobRoot;
+
+  @BeforeAll
+  public static void globalSetup() throws IOException {
+    final Path testDir = Path.of("/tmp/airbyte_tests/");
+    Files.createDirectories(testDir);
+    final Path workspaceRoot = Files.createTempDirectory(testDir, "test");
+    jobRoot = Files.createDirectories(Path.of(workspaceRoot.toString(), "job"));
+    Path localRoot = Files.createTempDirectory(testDir, "output");
+    processFactory = new DockerProcessFactory(
+        workspaceRoot,
+        workspaceRoot.toString(),
+        localRoot.toString(),
+        "host",
+        Collections.emptyMap());
+  }
+
+  private void runSync(ConfiguredAirbyteCatalog catalog, List<AirbyteMessage> messages) throws Exception {
+    final WorkerDestinationConfig destinationConfig = new WorkerDestinationConfig()
+        .withConnectionId(UUID.randomUUID())
+        .withCatalog(convertProtocolObject(catalog, io.airbyte.protocol.models.ConfiguredAirbyteCatalog.class))
+        .withDestinationConnectionConfiguration(config);
+
+    final AirbyteDestination destination = new DefaultAirbyteDestination(new AirbyteIntegrationLauncher(
+        "0",
+        0,
+        getImageName(),
+        processFactory,
+        null,
+        null,
+        false,
+        new EnvVariableFeatureFlags()));
+
+    destination.start(destinationConfig, jobRoot, Collections.emptyMap());
+    messages.forEach(message -> Exceptions.toRuntime(() ->
+        destination.accept(convertProtocolObject(message, io.airbyte.protocol.models.AirbyteMessage.class))));
+    destination.notifyEndOfInput();
+
+    while (!destination.isFinished()) {
+      destination.attemptRead();
+    }
+
+    destination.close();
+  }
+
+  private static <V0, V1> V0 convertProtocolObject(final V1 v1, final Class<V0> klass) {
+    return Jsons.object(Jsons.jsonNode(v1), klass);
+  }
+
+}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/schema.json b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/schema.json
new file mode 100644
index 000000000000..cc196c91f5e5
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/schema.json
@@ -0,0 +1,23 @@
+{
+  "type": "object",
+  "properties": {
+    "id1": { "type": "integer" },
+    "id2": { "type": "integer" },
+    "updated_at": {
+      "type": "string",
+      "airbyte_type": "timestamp_with_timezone"
+    },
+    "_ab_cdc_deleted_at": {
+      "type": "string",
+      "airbyte_type": "timestamp_with_timezone"
+    },
+    "name": { "type": "string" },
+    "address": {
+      "type": "object",
+      "properties": {
+        "city": { "type": "string" },
+        "state": { "type": "string" }
+      }
+    }
+  }
+}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_final.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_final.jsonl
new file mode 100644
index 000000000000..0b68fdcc802f
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_final.jsonl
@@ -0,0 +1,2 @@
+{"_airbyte_extracted_at":"1970-01-01T00:00:01Z","_airbyte_meta":{"errors":[]},"id1":1,"id2":200,"updated_at":"2000-01-01T00:00:00Z","_ab_cdc_deleted_at":null,"name":"Alice","address":{"city":"San Francisco","state":"CA"}}
+{"_airbyte_extracted_at":"1970-01-01T00:00:01Z","_airbyte_meta":{"errors":[]},"id1":1,"id2":200,"updated_at":"2000-01-01T00:01:00Z","_ab_cdc_deleted_at":null,"name":"Alice","address":{"city":"Los Angeles","state":"CA"}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl
new file mode 100644
index 000000000000..3010e4b5d73d
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl
@@ -0,0 +1,3 @@
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "San Francisco", "state": "CA"}}}
+// Note the duplicate record. In this sync mode, we don't dedup anything.
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Los Angeles", "state": "CA"}}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
new file mode 100644
index 000000000000..629cea3e4da8
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
@@ -0,0 +1,5 @@
+// emitted_at:1000 is equal to 1970-01-01 00:00:01Z. This obviously makes no sense in relation to updated_at being in the year 2000
+// but that's OK because (from destinations POV) updated_at has no relation to emitted_at.
+{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "San Francisco", "state": "CA"}}}}
+// Emit a second record for id=(1,200). This generally doesn't happen in full refresh syncs - but if T+D is implemented correctly, it shouldn't matter (i.e. both records should be written to the final table).
+{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Los Angeles", "state": "CA"}}}}
diff --git a/airbyte-integrations/connectors/destination-bigquery/build.gradle b/airbyte-integrations/connectors/destination-bigquery/build.gradle
index b78504011245..3867ce22c210 100644
--- a/airbyte-integrations/connectors/destination-bigquery/build.gradle
+++ b/airbyte-integrations/connectors/destination-bigquery/build.gradle
@@ -28,6 +28,7 @@ dependencies {
     implementation ('com.github.airbytehq:json-avro-converter:1.1.0') { exclude group: 'ch.qos.logback', module: 'logback-classic'}
 
     testImplementation project(':airbyte-integrations:bases:standard-destination-test')
+    testImplementation project(':airbyte-integrations:bases:base-typing-deduping-test')
 
     integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test')
     integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-bigquery')
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java
index ffba1f03cdcf..b4e7ba499f9b 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java
@@ -168,7 +168,7 @@ private AirbyteConnectionStatus checkGcsPermission(final JsonNode config) {
     }
   }
 
-  protected BigQuery getBigQuery(final JsonNode config) {
+  public static BigQuery getBigQuery(final JsonNode config) {
     final String projectId = config.get(BigQueryConsts.CONFIG_PROJECT_ID).asText();
 
     try {
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
index 3be1cecb0875..e6117f0a7813 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
@@ -9,7 +9,6 @@
 import static org.junit.jupiter.api.Assertions.*;
 
 import com.fasterxml.jackson.databind.JsonNode;
-import com.google.auth.oauth2.GoogleCredentials;
 import com.google.cloud.bigquery.*;
 import com.google.cloud.bigquery.Field.Mode;
 import com.google.common.collect.ImmutableMap;
@@ -22,7 +21,6 @@
 import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator.ColumnId;
 import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator.StreamId;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
-import io.airbyte.integrations.destination.bigquery.BigQueryUtils;
 import io.airbyte.protocol.models.v0.DestinationSyncMode;
 import io.airbyte.protocol.models.v0.SyncMode;
 import java.math.BigDecimal;
@@ -119,14 +117,7 @@ public static void setup() throws Exception {
     String rawConfig = Files.readString(Path.of("secrets/credentials-gcs-staging.json"));
     JsonNode config = Jsons.deserialize(rawConfig);
 
-    final BigQueryOptions.Builder bigQueryBuilder = BigQueryOptions.newBuilder();
-    final GoogleCredentials credentials = BigQueryDestination.getServiceAccountCredentials(config);
-    bq = bigQueryBuilder
-        .setProjectId(config.get("project_id").asText())
-        .setCredentials(credentials)
-        .setHeaderProvider(BigQueryUtils.getHeaderProvider())
-        .build()
-        .getService();
+    bq = BigQueryDestination.getBigQuery(config);
   }
 
   @BeforeEach
@@ -903,8 +894,12 @@ private static void logAndExecute(final String sql) throws InterruptedException
     bq.query(QueryJobConfiguration.newBuilder(sql).build());
   }
 
-  private Map<String, Object> toMap(Schema schema, FieldValueList row) {
-    final Map<String, Object> map = new HashMap<>();
+  /**
+   * FieldValueList stores everything internally as string (I think?) but provides conversions to more useful types.
+   * This method does that conversion, using the schema to determine which type is most appropriate.
+   */
+  private static LinkedHashMap<String, Object> toMap(Schema schema, FieldValueList row) {
+    final LinkedHashMap<String, Object> map = new LinkedHashMap<>();
     for (int i = 0; i < schema.getFields().size(); i++) {
       final Field field = schema.getFields().get(i);
       final FieldValue value = row.get(i);
@@ -939,13 +934,13 @@ private Map<String, Object> toMap(Schema schema, FieldValueList row) {
    * logs.
    */
   private void assertQueryResult(final List<Map<String, Optional<Object>>> expectedRows, final TableResult result) {
-    List<Map<String, Object>> actualRows = result.streamAll().map(row -> toMap(result.getSchema(), row)).toList();
+    List<LinkedHashMap<String, Object>> actualRows = toMaps(result);
     List<Map<String, Optional<Object>>> missingRows = new ArrayList<>();
     Set<Map<String, Object>> matchedRows = new HashSet<>();
     boolean foundMultiMatch = false;
     // For each expected row, iterate through all actual rows to find a match.
     for (Map<String, Optional<Object>> expectedRow : expectedRows) {
-      final List<Map<String, Object>> matchingRows = actualRows.stream().filter(actualRow -> {
+      final List<LinkedHashMap<String, Object>> matchingRows = actualRows.stream().filter(actualRow -> {
         // We only want to check the fields that are specified in the expected row.
         // E.g.we shouldn't assert against randomized UUIDs.
         for (Entry<String, Optional<Object>> expectedEntry : expectedRow.entrySet()) {
@@ -984,6 +979,17 @@ private void assertQueryResult(final List<Map<String, Optional<Object>>> expecte
     }
   }
 
+  /**
+   * TableResult contains records in a somewhat nonintuitive format (and it avoids loading them all into memory).
+   * That's annoying for us since we're working with small test data, so pull everything into a list, and convert them
+   * into maps of column name -> value.
+   * <p>
+   * Note that the values have reasonable types; see {@link #toMap(Schema, FieldValueList)} for details.
+   */
+  public static List<LinkedHashMap<String, Object>> toMaps(TableResult result) {
+    return result.streamAll().map(row -> toMap(result.getSchema(), row)).toList();
+  }
+
   private static String sortedToString(Map<String, Object> record) {
     return sortedToString(record, Function.identity());
   }
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
new file mode 100644
index 000000000000..03faa47ac72c
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
@@ -0,0 +1,82 @@
+package io.airbyte.integrations.destination.bigquery.typing_deduping;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.QueryJobConfiguration;
+import com.google.cloud.bigquery.TableId;
+import com.google.cloud.bigquery.TableResult;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.commons.string.Strings;
+import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest;
+import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
+import io.airbyte.integrations.destination.bigquery.BigQueryDestinationTestUtils;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import org.junit.jupiter.api.BeforeAll;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BigQueryStandardInsertsTypingDedupingTest extends BaseTypingDedupingTest {
+  private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryStandardInsertsTypingDedupingTest.class);
+
+  private static BigQuery bq;
+
+  // Note that this is not an @Override, because it's a static method. I would love suggestions on how to do this better :)
+  @BeforeAll
+  public static void buildConfig() throws IOException {
+    final String datasetId = Strings.addRandomSuffix("typing_deduping_default_dataset", "_", 5);
+    LOGGER.info("Setting default dataset to {}", datasetId);
+    config = BigQueryDestinationTestUtils.createConfig(Path.of("secrets/credentials-1s1t-standard.json"), datasetId);
+    bq = BigQueryDestination.getBigQuery(config);
+  }
+
+  @Override
+  protected String getImageName() {
+    return "airbyte/destination-bigquery:dev";
+  }
+
+  @Override
+  protected List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws InterruptedException {
+    TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM airbyte." + streamNamespace + "_" + streamName));
+    List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
+    return rowsAsMaps.stream().map(BigQueryStandardInsertsTypingDedupingTest::toJson).toList();
+  }
+
+  @Override
+  protected List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws InterruptedException {
+    TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM " + streamNamespace + "." + streamName));
+    List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
+    return rowsAsMaps.stream().map(BigQueryStandardInsertsTypingDedupingTest::toJson).toList();
+  }
+
+  @Override
+  protected void loadInitialRecords(String streamNamespace, String streamName, List<JsonNode> initialRecords) {
+    // TODO
+  }
+
+  @Override
+  protected void teardownStreamAndNamespace(String streamNamespace, String streamName) {
+    bq.delete(TableId.of("airbyte", streamNamespace + "_" + streamName));
+    bq.delete(DatasetId.of(streamNamespace), BigQuery.DatasetDeleteOption.deleteContents());
+  }
+
+  private static JsonNode toJson(LinkedHashMap<String, Object> map) {
+    ObjectNode o = (ObjectNode) Jsons.emptyObject();
+    for (Map.Entry<String, Object> entry : map.entrySet()) {
+      Object value = entry.getValue();
+      if (value instanceof Instant i) {
+        // naively serializing an Instant returns a DecimalNode with the unix epoch, so manually dump the string here.
+        o.set(entry.getKey(), Jsons.jsonNode(i.toString()));
+      } else {
+        o.set(entry.getKey(), Jsons.jsonNode(value));
+      }
+    }
+    return o;
+  }
+}
diff --git a/settings.gradle b/settings.gradle
index fe9e61d097f4..ecbf49bb4a6b 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -105,6 +105,7 @@ if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD"
     include ':airbyte-integrations:bases:base-java-s3'
     include ':airbyte-integrations:bases:base-normalization'
     include ':airbyte-integrations:bases:base-typing-deduping'
+    include ':airbyte-integrations:bases:base-typing-deduping-test'
     include ':airbyte-integrations:bases:bases-destination-jdbc' // needs to be lexicographically after base-java and base-normalization to avoid race condition
     include ':airbyte-integrations:bases:base-standard-source-test-file'
     include ':airbyte-integrations:bases:connector-acceptance-test'

From 639f77a0d7ddf40bb7e94de5c8951adbbc95eef9 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Thu, 29 Jun 2023 22:53:21 +0000
Subject: [PATCH 02/46] Automated Commit - Formatting Changes

---
 .../BaseTypingDedupingTest.java               | 84 +++++++++++--------
 1 file changed, 51 insertions(+), 33 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 62bd9bcaabfc..6e553e6a0d4b 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -1,3 +1,7 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
 package io.airbyte.integrations.base.destination.typing_deduping;
 
 import static org.junit.jupiter.api.Assertions.assertAll;
@@ -39,15 +43,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * This is loosely based on standard-destination-tests's DestinationAcceptanceTest class. The sync-running code is copy-pasted from there.
+ * This is loosely based on standard-destination-tests's DestinationAcceptanceTest class. The
+ * sync-running code is copy-pasted from there.
  * <p>
- * All tests use a single stream, whose schema is defined in {@code resources/schema.json}. Each test case constructs a
- * ConfiguredAirbyteCatalog dynamically.
+ * All tests use a single stream, whose schema is defined in {@code resources/schema.json}. Each
+ * test case constructs a ConfiguredAirbyteCatalog dynamically.
  * <p>
- * For sync modes which use a primary key, the stream provides a composite key of (id1, id2). For sync modes which use a
- * cursor, the stream provides an updated_at field. The stream also has an _ab_cdc_deleted_at field.
+ * For sync modes which use a primary key, the stream provides a composite key of (id1, id2). For
+ * sync modes which use a cursor, the stream provides an updated_at field. The stream also has an
+ * _ab_cdc_deleted_at field.
  */
 public abstract class BaseTypingDedupingTest {
+
   private static final Logger LOGGER = LoggerFactory.getLogger(BaseTypingDedupingTest.class);
   private static final Comparator<JsonNode> RAW_RECORD_IDENTITY_COMPARATOR = Comparator
       .comparingLong((JsonNode record) -> asInt(record.get("_airbyte_data").get("id1")))
@@ -66,10 +73,11 @@ public abstract class BaseTypingDedupingTest {
   private static ProcessFactory processFactory;
 
   /**
-   * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this field.
+   * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this
+   * field.
    * <p>
-   * That method should also start testcontainer(s), if you're using them. That test container will be used for all
-   * tests. This is safe because each test uses a randomized stream namespace+name.
+   * That method should also start testcontainer(s), if you're using them. That test container will be
+   * used for all tests. This is safe because each test uses a randomized stream namespace+name.
    */
   protected static JsonNode config;
 
@@ -82,27 +90,32 @@ public abstract class BaseTypingDedupingTest {
   protected abstract String getImageName();
 
   /**
-   * For a given stream, return the records that exist in the destination's raw table. This _should_ include metadata columns (e.g. _airbyte_raw_id).
-   * The {@code _airbyte_data} column MUST be an {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
+   * For a given stream, return the records that exist in the destination's raw table. This _should_
+   * include metadata columns (e.g. _airbyte_raw_id). The {@code _airbyte_data} column MUST be an
+   * {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
    */
   protected abstract List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws Exception;
 
   /**
-   * For a given stream, return the records that exist in the destination's final table. This _should_ include metadata columns (e.g. _airbyte_raw_id).
+   * For a given stream, return the records that exist in the destination's final table. This _should_
+   * include metadata columns (e.g. _airbyte_raw_id).
    */
   protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
 
   /**
-   * Create raw+final tables in the destinations as though a previous sync had loaded {@code initialRecords}. This method
-   * exists so that we don't need to run a sync just to load initial state, because that's both slow and error-prone.
+   * Create raw+final tables in the destinations as though a previous sync had loaded
+   * {@code initialRecords}. This method exists so that we don't need to run a sync just to load
+   * initial state, because that's both slow and error-prone.
    */
   protected abstract void loadInitialRecords(String streamNamespace, String streamName, List<JsonNode> initialRecords) throws Exception;
 
   /**
-   * Delete any resources in the destination associated with this stream AND its namespace. We need this because we write
-   * raw tables to a shared {@code airbyte} namespace, which we can't drop wholesale.
+   * Delete any resources in the destination associated with this stream AND its namespace. We need
+   * this because we write raw tables to a shared {@code airbyte} namespace, which we can't drop
+   * wholesale.
    * <p>
-   * In general, this should resemble {@code DROP TABLE airbyte.namespace_name; DROP SCHEMA namespace}.
+   * In general, this should resemble
+   * {@code DROP TABLE airbyte.namespace_name; DROP SCHEMA namespace}.
    */
   protected abstract void teardownStreamAndNamespace(String streamNamespace, String streamName) throws Exception;
 
@@ -119,8 +132,8 @@ public void teardown() throws Exception {
   }
 
   /**
-   * Starting with an empty destination, execute a full refresh overwrite sync. Verify that the records are written to
-   * the destination table.
+   * Starting with an empty destination, execute a full refresh overwrite sync. Verify that the
+   * records are written to the destination table.
    */
   @Test
   public void initialFullRefreshOverwrite() throws Exception {
@@ -170,8 +183,7 @@ private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode>
 
     assertAll(
         () -> assertTrue(rawDiff.isEmpty(), "Raw table was incorrect.\n" + rawDiff),
-        () -> assertTrue(finalDiff.isEmpty(), "Final table was incorrect.\n" + finalDiff)
-    );
+        () -> assertTrue(finalDiff.isEmpty(), "Final table was incorrect.\n" + finalDiff));
   }
 
   private static String diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
@@ -183,16 +195,20 @@ private static String diffFinalTableRecords(List<JsonNode> expectedRecords, List
   }
 
   /**
-   * Generate a human-readable diff between the two lists. Only checks the keys specified in expectedRecords.
+   * Generate a human-readable diff between the two lists. Only checks the keys specified in
+   * expectedRecords.
    *
-   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same PK+cursor+extracted_at)
-   * @param sortComparator Behaves identically to identityComparator, but if two records are the same, breaks that tie using _airbyte_raw_id
+   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
+   *        PK+cursor+extracted_at)
+   * @param sortComparator Behaves identically to identityComparator, but if two records are the same,
+   *        breaks that tie using _airbyte_raw_id
    * @return The diff, or empty string if there were no differences
    */
   private static String diffRecords(
-      List<JsonNode> originalExpectedRecords,
-      List<JsonNode> originalActualRecords,
-      Comparator<JsonNode> identityComparator, Comparator<JsonNode> sortComparator) {
+                                    List<JsonNode> originalExpectedRecords,
+                                    List<JsonNode> originalActualRecords,
+                                    Comparator<JsonNode> identityComparator,
+                                    Comparator<JsonNode> sortComparator) {
     List<JsonNode> expectedRecords = originalExpectedRecords.stream().sorted(sortComparator).toList();
     List<JsonNode> actualRecords = originalActualRecords.stream().sorted(sortComparator).toList();
 
@@ -230,11 +246,13 @@ private static String diffRecords(
         expectedRecordIndex++;
         actualRecordIndex++;
       } else if (compare < 0) {
-        // The expected record is missing from the actual records. Print it and move on to the next expected record.
+        // The expected record is missing from the actual records. Print it and move on to the next expected
+        // record.
         message += "Row was expected but missing: " + expectedRecord + "\n";
         expectedRecordIndex++;
       } else {
-        // There's an actual record which isn't present in the expected records. Print it and move on to the next actual record.
+        // There's an actual record which isn't present in the expected records. Print it and move on to the
+        // next actual record.
         message += "Row was not expected but present: " + actualRecord + "\n";
         actualRecordIndex++;
       }
@@ -278,9 +296,9 @@ private static Instant asTimestamp(JsonNode node) {
     }
   }
 
-  /* !!!!!! WARNING !!!!!!
-   * The code below was mostly copypasted from DestinationAcceptanceTest. If you make edits here, you probably want to also edit there.
-   * !!!!!!!!!!!!!!!!!!!!!
+  /*
+   * !!!!!! WARNING !!!!!! The code below was mostly copypasted from DestinationAcceptanceTest. If you
+   * make edits here, you probably want to also edit there. !!!!!!!!!!!!!!!!!!!!!
    */
 
   private static Path jobRoot;
@@ -317,8 +335,8 @@ private void runSync(ConfiguredAirbyteCatalog catalog, List<AirbyteMessage> mess
         new EnvVariableFeatureFlags()));
 
     destination.start(destinationConfig, jobRoot, Collections.emptyMap());
-    messages.forEach(message -> Exceptions.toRuntime(() ->
-        destination.accept(convertProtocolObject(message, io.airbyte.protocol.models.AirbyteMessage.class))));
+    messages.forEach(
+        message -> Exceptions.toRuntime(() -> destination.accept(convertProtocolObject(message, io.airbyte.protocol.models.AirbyteMessage.class))));
     destination.notifyEndOfInput();
 
     while (!destination.isFinished()) {

From 3653da448fc5d132fea87ebd81d28d3041dfd914 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Thu, 29 Jun 2023 19:33:43 -0700
Subject: [PATCH 03/46] add second sync to test

---
 .../base-typing-deduping-test/build.gradle    |   1 +
 .../BaseTypingDedupingTest.java               | 184 +++++++++++-------
 .../src/main/resources/sync1_messages.jsonl   |   5 +-
 ...drecords_fullrefresh_overwrite_final.jsonl |   2 +
 ...tedrecords_fullrefresh_overwrite_raw.jsonl |   2 +
 .../src/main/resources/sync2_messages.jsonl   |   2 +
 .../typing_deduping/BigQuerySqlGenerator.java |   4 +-
 ...ueryStandardInsertsTypingDedupingTest.java |  13 +-
 8 files changed, 134 insertions(+), 79 deletions(-)
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_messages.jsonl

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/build.gradle b/airbyte-integrations/bases/base-typing-deduping-test/build.gradle
index cfbf0a72d513..5c786c2f79c0 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/build.gradle
+++ b/airbyte-integrations/bases/base-typing-deduping-test/build.gradle
@@ -5,6 +5,7 @@ plugins {
 dependencies {
   implementation project(':airbyte-config-oss:config-models-oss')
   implementation project(':airbyte-connector-test-harnesses:acceptance-test-harness')
+  implementation project(':airbyte-integrations:bases:base-typing-deduping')
   implementation libs.airbyte.protocol
 
   implementation(enforcedPlatform('org.junit:junit-bom:5.8.2'))
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 6e553e6a0d4b..059f29375dcc 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -35,6 +35,7 @@
 import java.util.List;
 import java.util.Objects;
 import java.util.UUID;
+import java.util.function.Function;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
@@ -43,18 +44,15 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * This is loosely based on standard-destination-tests's DestinationAcceptanceTest class. The
- * sync-running code is copy-pasted from there.
+ * This is loosely based on standard-destination-tests's DestinationAcceptanceTest class. The sync-running code is copy-pasted from there.
  * <p>
- * All tests use a single stream, whose schema is defined in {@code resources/schema.json}. Each
- * test case constructs a ConfiguredAirbyteCatalog dynamically.
+ * All tests use a single stream, whose schema is defined in {@code resources/schema.json}. Each test case constructs a
+ * ConfiguredAirbyteCatalog dynamically.
  * <p>
- * For sync modes which use a primary key, the stream provides a composite key of (id1, id2). For
- * sync modes which use a cursor, the stream provides an updated_at field. The stream also has an
- * _ab_cdc_deleted_at field.
+ * For sync modes which use a primary key, the stream provides a composite key of (id1, id2). For sync modes which use a
+ * cursor, the stream provides an updated_at field. The stream also has an _ab_cdc_deleted_at field.
  */
 public abstract class BaseTypingDedupingTest {
-
   private static final Logger LOGGER = LoggerFactory.getLogger(BaseTypingDedupingTest.class);
   private static final Comparator<JsonNode> RAW_RECORD_IDENTITY_COMPARATOR = Comparator
       .comparingLong((JsonNode record) -> asInt(record.get("_airbyte_data").get("id1")))
@@ -73,11 +71,10 @@ public abstract class BaseTypingDedupingTest {
   private static ProcessFactory processFactory;
 
   /**
-   * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this
-   * field.
+   * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this field.
    * <p>
-   * That method should also start testcontainer(s), if you're using them. That test container will be
-   * used for all tests. This is safe because each test uses a randomized stream namespace+name.
+   * That method should also start testcontainer(s), if you're using them. That test container will be used for all
+   * tests. This is safe because each test uses a randomized stream namespace+name.
    */
   protected static JsonNode config;
 
@@ -90,32 +87,25 @@ public abstract class BaseTypingDedupingTest {
   protected abstract String getImageName();
 
   /**
-   * For a given stream, return the records that exist in the destination's raw table. This _should_
-   * include metadata columns (e.g. _airbyte_raw_id). The {@code _airbyte_data} column MUST be an
-   * {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
+   * For a given stream, return the records that exist in the destination's raw table. This _should_ include metadata columns (e.g. _airbyte_raw_id).
+   * The {@code _airbyte_data} column MUST be an {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
+   * <p>
+   * Each record should be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_loaded_at": "..", "_airbyte_data": {fields...}}.
    */
   protected abstract List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws Exception;
 
   /**
-   * For a given stream, return the records that exist in the destination's final table. This _should_
-   * include metadata columns (e.g. _airbyte_raw_id).
+   * For a given stream, return the records that exist in the destination's final table. This _should_ include metadata columns (e.g. _airbyte_raw_id).
+   * <p>
+   * Each record should be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_meta": "..", "field1": ..., "field2": ..., ...}.
    */
   protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
 
   /**
-   * Create raw+final tables in the destinations as though a previous sync had loaded
-   * {@code initialRecords}. This method exists so that we don't need to run a sync just to load
-   * initial state, because that's both slow and error-prone.
-   */
-  protected abstract void loadInitialRecords(String streamNamespace, String streamName, List<JsonNode> initialRecords) throws Exception;
-
-  /**
-   * Delete any resources in the destination associated with this stream AND its namespace. We need
-   * this because we write raw tables to a shared {@code airbyte} namespace, which we can't drop
-   * wholesale.
+   * Delete any resources in the destination associated with this stream AND its namespace. We need this because we write
+   * raw tables to a shared {@code airbyte} namespace, which we can't drop wholesale.
    * <p>
-   * In general, this should resemble
-   * {@code DROP TABLE airbyte.namespace_name; DROP SCHEMA namespace}.
+   * In general, this should resemble {@code DROP TABLE airbyte.namespace_name; DROP SCHEMA namespace}.
    */
   protected abstract void teardownStreamAndNamespace(String streamNamespace, String streamName) throws Exception;
 
@@ -132,11 +122,11 @@ public void teardown() throws Exception {
   }
 
   /**
-   * Starting with an empty destination, execute a full refresh overwrite sync. Verify that the
-   * records are written to the destination table.
+   * Starting with an empty destination, execute a full refresh overwrite sync. Verify that the records are written to
+   * the destination table. Then run a second sync, and verify that the records are overwritten.
    */
   @Test
-  public void initialFullRefreshOverwrite() throws Exception {
+  public void fullRefreshOverwrite() throws Exception {
     ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
         new ConfiguredAirbyteStream()
             .withSyncMode(SyncMode.FULL_REFRESH)
@@ -145,13 +135,24 @@ public void initialFullRefreshOverwrite() throws Exception {
                 .withNamespace(streamNamespace)
                 .withName(streamName)
                 .withJsonSchema(getSchema()))));
-    List<AirbyteMessage> messages = readMessages("sync1_messages.jsonl");
 
-    runSync(catalog, messages);
+    // First sync
+    List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl");
+
+    runSync(catalog, messages1);
+
+    List<JsonNode> expectedRawRecords1 = readRecords("sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl");
+    List<JsonNode> expectedFinalRecords1 = readRecords("sync1_expectedrecords_fullrefresh_overwrite_final.jsonl");
+    verifySyncResult(expectedRawRecords1, expectedFinalRecords1);
 
-    List<JsonNode> expectedRawRecords = readRecords("sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl");
-    List<JsonNode> expectedFinalRecords = readRecords("sync1_expectedrecords_fullrefresh_overwrite_final.jsonl");
-    verifySyncResult(expectedRawRecords, expectedFinalRecords);
+    // Second sync
+    List<AirbyteMessage> messages2 = readMessages("sync2_messages.jsonl");
+
+    runSync(catalog, messages2);
+
+    List<JsonNode> expectedRawRecords2 = readRecords("sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl");
+    List<JsonNode> expectedFinalRecords2 = readRecords("sync2_expectedrecords_fullrefresh_overwrite_final.jsonl");
+    verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
   }
 
   private static JsonNode getSchema() throws IOException {
@@ -183,32 +184,59 @@ private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode>
 
     assertAll(
         () -> assertTrue(rawDiff.isEmpty(), "Raw table was incorrect.\n" + rawDiff),
-        () -> assertTrue(finalDiff.isEmpty(), "Final table was incorrect.\n" + finalDiff));
+        () -> assertTrue(finalDiff.isEmpty(), "Final table was incorrect.\n" + finalDiff)
+    );
   }
 
   private static String diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
-    return diffRecords(expectedRecords, actualRecords, RAW_RECORD_IDENTITY_COMPARATOR, RAW_RECORD_SORT_COMPARATOR);
+    return diffRecords(
+        expectedRecords,
+        actualRecords,
+        RAW_RECORD_IDENTITY_COMPARATOR,
+        RAW_RECORD_SORT_COMPARATOR,
+        record -> getFieldIfPresent(record.get("_airbyte_data"), "id1")
+            + getFieldIfPresent(record.get("_airbyte_data"), "id2")
+            + getFieldIfPresent(record.get("_airbyte_data"), "updated_at")
+            + getFieldIfPresent(record, "_airbyte_extracted_at"),
+        true);
   }
 
   private static String diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
-    return diffRecords(expectedRecords, actualRecords, FINAL_RECORD_IDENTITY_COMPARATOR, FINAL_RECORD_SORT_COMPARATOR);
+    return diffRecords(
+        expectedRecords,
+        actualRecords,
+        FINAL_RECORD_IDENTITY_COMPARATOR,
+        FINAL_RECORD_SORT_COMPARATOR,
+        record -> getFieldIfPresent(record, "id1")
+            + getFieldIfPresent(record, "id2")
+            + getFieldIfPresent(record, "updated_at")
+            + getFieldIfPresent(record, "_airbyte_extracted_at"),
+        false);
+  }
+
+  private static String getFieldIfPresent(JsonNode record, String field) {
+    if (record.has(field)) {
+      return field + "=" + record.get(field) + "; ";
+    } else {
+      return "";
+    }
   }
 
   /**
-   * Generate a human-readable diff between the two lists. Only checks the keys specified in
-   * expectedRecords.
+   * Generate a human-readable diff between the two lists. Only checks the keys specified in expectedRecords.
+   * Assumes (in general) that two records with the same PK, cursor, and extracted_at are the same record.
    *
-   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
-   *        PK+cursor+extracted_at)
-   * @param sortComparator Behaves identically to identityComparator, but if two records are the same,
-   *        breaks that tie using _airbyte_raw_id
+   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same PK+cursor+extracted_at)
+   * @param sortComparator Behaves identically to identityComparator, but if two records are the same, breaks that tie using _airbyte_raw_id
    * @return The diff, or empty string if there were no differences
    */
   private static String diffRecords(
-                                    List<JsonNode> originalExpectedRecords,
-                                    List<JsonNode> originalActualRecords,
-                                    Comparator<JsonNode> identityComparator,
-                                    Comparator<JsonNode> sortComparator) {
+      List<JsonNode> originalExpectedRecords,
+      List<JsonNode> originalActualRecords,
+      Comparator<JsonNode> identityComparator,
+      Comparator<JsonNode> sortComparator,
+      Function<JsonNode, String> recordIdExtractor,
+      boolean extractRawData) {
     List<JsonNode> expectedRecords = originalExpectedRecords.stream().sorted(sortComparator).toList();
     List<JsonNode> actualRecords = originalActualRecords.stream().sorted(sortComparator).toList();
 
@@ -225,18 +253,36 @@ private static String diffRecords(
       if (compare == 0) {
         // These records should be the same. Find the specific fields that are different.
         boolean foundMismatch = false;
-        String mismatchedRecordMessage = "Row had incorrect data:\n";
+        String mismatchedRecordMessage = "Row had incorrect data:" + recordIdExtractor.apply(expectedRecord) + "\n";
         for (String key : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
-          JsonNode expectedValue = expectedRecord.get(key);
-          JsonNode actualValue = actualRecord.get(key);
-          // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
-          if (!Objects.equals(expectedValue, actualValue)
-              // Objects.equals expects the two values to be the same class.
-              // We need to handle comparisons between e.g. LongNode and IntNode.
-              && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
-              && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble())) {
-            mismatchedRecordMessage += "  For key " + key + ", expected " + expectedValue + " but got " + actualValue + "\n";
-            foundMismatch = true;
+          if (extractRawData && "_airbyte_data".equals(key)) {
+            JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
+            JsonNode actualRawData = actualRecord.get("_airbyte_data");
+            for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
+              JsonNode expectedValue = expectedRawData.get(field);
+              JsonNode actualValue = actualRawData.get(field);
+              // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
+              if (!Objects.equals(expectedValue, actualValue)
+                  // Objects.equals expects the two values to be the same class.
+                  // We need to handle comparisons between e.g. LongNode and IntNode.
+                  && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
+                  && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble())) {
+                mismatchedRecordMessage += "  For _airbyte_data." + field + ", expected " + expectedValue + " but got " + actualValue + "\n";
+                foundMismatch = true;
+              }
+            }
+          } else {
+            JsonNode expectedValue = expectedRecord.get(key);
+            JsonNode actualValue = actualRecord.get(key);
+            // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
+            if (!Objects.equals(expectedValue, actualValue)
+                // Objects.equals expects the two values to be the same class.
+                // We need to handle comparisons between e.g. LongNode and IntNode.
+                && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
+                && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble())) {
+              mismatchedRecordMessage += "  For key " + key + ", expected " + expectedValue + " but got " + actualValue + "\n";
+              foundMismatch = true;
+            }
           }
         }
         if (foundMismatch) {
@@ -246,13 +292,11 @@ private static String diffRecords(
         expectedRecordIndex++;
         actualRecordIndex++;
       } else if (compare < 0) {
-        // The expected record is missing from the actual records. Print it and move on to the next expected
-        // record.
+        // The expected record is missing from the actual records. Print it and move on to the next expected record.
         message += "Row was expected but missing: " + expectedRecord + "\n";
         expectedRecordIndex++;
       } else {
-        // There's an actual record which isn't present in the expected records. Print it and move on to the
-        // next actual record.
+        // There's an actual record which isn't present in the expected records. Print it and move on to the next actual record.
         message += "Row was not expected but present: " + actualRecord + "\n";
         actualRecordIndex++;
       }
@@ -296,9 +340,9 @@ private static Instant asTimestamp(JsonNode node) {
     }
   }
 
-  /*
-   * !!!!!! WARNING !!!!!! The code below was mostly copypasted from DestinationAcceptanceTest. If you
-   * make edits here, you probably want to also edit there. !!!!!!!!!!!!!!!!!!!!!
+  /* !!!!!! WARNING !!!!!!
+   * The code below was mostly copypasted from DestinationAcceptanceTest. If you make edits here, you probably want to also edit there.
+   * !!!!!!!!!!!!!!!!!!!!!
    */
 
   private static Path jobRoot;
@@ -335,8 +379,8 @@ private void runSync(ConfiguredAirbyteCatalog catalog, List<AirbyteMessage> mess
         new EnvVariableFeatureFlags()));
 
     destination.start(destinationConfig, jobRoot, Collections.emptyMap());
-    messages.forEach(
-        message -> Exceptions.toRuntime(() -> destination.accept(convertProtocolObject(message, io.airbyte.protocol.models.AirbyteMessage.class))));
+    messages.forEach(message -> Exceptions.toRuntime(() ->
+        destination.accept(convertProtocolObject(message, io.airbyte.protocol.models.AirbyteMessage.class))));
     destination.notifyEndOfInput();
 
     while (!destination.isFinished()) {
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
index 629cea3e4da8..b2fc2a1ea173 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
@@ -1,5 +1,6 @@
-// emitted_at:1000 is equal to 1970-01-01 00:00:01Z. This obviously makes no sense in relation to updated_at being in the year 2000
-// but that's OK because (from destinations POV) updated_at has no relation to emitted_at.
+// emitted_at:1000 is equal to 1970-01-01 00:00:01Z, which is what you'll see in the expected records.
+// This obviously makes no sense in relation to updated_at being in the year 2000, but that's OK
+// because (from destinations POV) updated_at has no relation to emitted_at.
 {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "San Francisco", "state": "CA"}}}}
 // Emit a second record for id=(1,200). This generally doesn't happen in full refresh syncs - but if T+D is implemented correctly, it shouldn't matter (i.e. both records should be written to the final table).
 {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Los Angeles", "state": "CA"}}}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl
new file mode 100644
index 000000000000..80df5e903881
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl
@@ -0,0 +1,2 @@
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1":  1, "id2":  200, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Seattle", "state": "WA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1":  1, "id2":  201, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Bob", "address":  {"city":  "New York", "state": "NY"}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl
new file mode 100644
index 000000000000..6f48c9630b3d
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl
@@ -0,0 +1,2 @@
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Seattle", "state": "WA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1":  1, "id2":  201, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Bob", "address":  {"city":  "New York", "state": "NY"}}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_messages.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_messages.jsonl
new file mode 100644
index 000000000000..49d8f5a605eb
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_messages.jsonl
@@ -0,0 +1,2 @@
+{"type": "RECORD", "record": {"emitted_at": 2000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Seattle", "state": "WA"}}}}
+{"type": "RECORD", "record": {"emitted_at": 2000, "data": {"id1":  1, "id2":  201, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Bob", "address":  {"city":  "New York", "state": "NY"}}}}
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
index f27305c84dfc..d32ecec9456b 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
@@ -69,7 +69,7 @@ public ColumnId buildColumnId(final String name) {
     return new ColumnId(nameTransformer.getIdentifier(quotedName), name, canonicalized);
   }
 
-  public StandardSQLTypeName toDialectType(final AirbyteType type) {
+  public static StandardSQLTypeName toDialectType(final AirbyteType type) {
     // switch pattern-matching is still in preview at language level 17 :(
     if (type instanceof final AirbyteProtocolType p) {
       return toDialectType(p);
@@ -137,7 +137,7 @@ ELSE JSON_QUERY(`_airbyte_data`, '$.${column_name}')
     }
   }
 
-  public StandardSQLTypeName toDialectType(final AirbyteProtocolType airbyteProtocolType) {
+  public static StandardSQLTypeName toDialectType(final AirbyteProtocolType airbyteProtocolType) {
     return switch (airbyteProtocolType) {
       // TODO doublecheck these
       case STRING -> StandardSQLTypeName.STRING;
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
index 03faa47ac72c..1e75b3431a65 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
@@ -1,5 +1,7 @@
 package io.airbyte.integrations.destination.bigquery.typing_deduping;
 
+import static java.util.stream.Collectors.joining;
+
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.google.cloud.bigquery.BigQuery;
@@ -9,15 +11,21 @@
 import com.google.cloud.bigquery.TableResult;
 import io.airbyte.commons.json.Jsons;
 import io.airbyte.commons.string.Strings;
+import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType;
+import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.Struct;
 import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestinationTestUtils;
+import io.airbyte.protocol.models.v0.DestinationSyncMode;
+import io.airbyte.protocol.models.v0.SyncMode;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.time.Instant;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.commons.text.StringSubstitutor;
 import org.junit.jupiter.api.BeforeAll;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -55,11 +63,6 @@ protected List<JsonNode> dumpFinalTableRecords(String streamNamespace, String st
     return rowsAsMaps.stream().map(BigQueryStandardInsertsTypingDedupingTest::toJson).toList();
   }
 
-  @Override
-  protected void loadInitialRecords(String streamNamespace, String streamName, List<JsonNode> initialRecords) {
-    // TODO
-  }
-
   @Override
   protected void teardownStreamAndNamespace(String streamNamespace, String streamName) {
     bq.delete(TableId.of("airbyte", streamNamespace + "_" + streamName));

From a4ba44a30fa0033a807b250a51467269c80dc097 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Thu, 29 Jun 2023 19:55:07 -0700
Subject: [PATCH 04/46] do concurrent things

---
 .../typing_deduping/BaseTypingDedupingTest.java   | 15 ++++++++++-----
 ...BigQueryStandardInsertsTypingDedupingTest.java | 10 +---------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 059f29375dcc..5514be9862d7 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -37,9 +37,10 @@
 import java.util.UUID;
 import java.util.function.Function;
 import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.parallel.Execution;
+import org.junit.jupiter.api.parallel.ExecutionMode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -52,6 +53,9 @@
  * For sync modes which use a primary key, the stream provides a composite key of (id1, id2). For sync modes which use a
  * cursor, the stream provides an updated_at field. The stream also has an _ab_cdc_deleted_at field.
  */
+// Remember to set `'junit.jupiter.execution.parallel.enabled': 'true'` in your connector's build.gradle.
+// See destination-bigquery for an example.
+@Execution(ExecutionMode.CONCURRENT)
 public abstract class BaseTypingDedupingTest {
   private static final Logger LOGGER = LoggerFactory.getLogger(BaseTypingDedupingTest.class);
   private static final Comparator<JsonNode> RAW_RECORD_IDENTITY_COMPARATOR = Comparator
@@ -68,7 +72,6 @@ public abstract class BaseTypingDedupingTest {
       .thenComparing(record -> asTimestamp(record.get("_airbyte_extracted_at")));
   private static final Comparator<JsonNode> FINAL_RECORD_SORT_COMPARATOR = FINAL_RECORD_IDENTITY_COMPARATOR
       .thenComparing(record -> asString(record.get("_airbyte_raw_id")));
-  private static ProcessFactory processFactory;
 
   /**
    * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this field.
@@ -345,10 +348,12 @@ private static Instant asTimestamp(JsonNode node) {
    * !!!!!!!!!!!!!!!!!!!!!
    */
 
-  private static Path jobRoot;
+  private Path jobRoot;
+  // This contains some state, so it needs to be instanced per test (i.e. cannot be static)
+  private ProcessFactory processFactory;
 
-  @BeforeAll
-  public static void globalSetup() throws IOException {
+  @BeforeEach
+  public void setupProcessFactory() throws IOException {
     final Path testDir = Path.of("/tmp/airbyte_tests/");
     Files.createDirectories(testDir);
     final Path workspaceRoot = Files.createTempDirectory(testDir, "test");
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
index 1e75b3431a65..e5bc2fa51a73 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
@@ -1,7 +1,5 @@
 package io.airbyte.integrations.destination.bigquery.typing_deduping;
 
-import static java.util.stream.Collectors.joining;
-
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.google.cloud.bigquery.BigQuery;
@@ -11,21 +9,15 @@
 import com.google.cloud.bigquery.TableResult;
 import io.airbyte.commons.json.Jsons;
 import io.airbyte.commons.string.Strings;
-import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType;
-import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.Struct;
 import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestinationTestUtils;
-import io.airbyte.protocol.models.v0.DestinationSyncMode;
-import io.airbyte.protocol.models.v0.SyncMode;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.time.Instant;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.stream.Collectors;
-import org.apache.commons.text.StringSubstitutor;
 import org.junit.jupiter.api.BeforeAll;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -74,7 +66,7 @@ private static JsonNode toJson(LinkedHashMap<String, Object> map) {
     for (Map.Entry<String, Object> entry : map.entrySet()) {
       Object value = entry.getValue();
       if (value instanceof Instant i) {
-        // naively serializing an Instant returns a DecimalNode with the unix epoch, so manually dump the string here.
+        // naively converting an Instant returns a DecimalNode with the unix epoch, so instead we manually stringify it
         o.set(entry.getKey(), Jsons.jsonNode(i.toString()));
       } else {
         o.set(entry.getKey(), Jsons.jsonNode(value));

From 8298bff2912d766a3dbf3c19f2013815817a7c40 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Fri, 30 Jun 2023 02:59:22 +0000
Subject: [PATCH 05/46] Automated Commit - Formatting Changes

---
 .../BaseTypingDedupingTest.java               | 92 +++++++++++--------
 1 file changed, 54 insertions(+), 38 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 5514be9862d7..d80b98e481af 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -45,18 +45,22 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * This is loosely based on standard-destination-tests's DestinationAcceptanceTest class. The sync-running code is copy-pasted from there.
+ * This is loosely based on standard-destination-tests's DestinationAcceptanceTest class. The
+ * sync-running code is copy-pasted from there.
  * <p>
- * All tests use a single stream, whose schema is defined in {@code resources/schema.json}. Each test case constructs a
- * ConfiguredAirbyteCatalog dynamically.
+ * All tests use a single stream, whose schema is defined in {@code resources/schema.json}. Each
+ * test case constructs a ConfiguredAirbyteCatalog dynamically.
  * <p>
- * For sync modes which use a primary key, the stream provides a composite key of (id1, id2). For sync modes which use a
- * cursor, the stream provides an updated_at field. The stream also has an _ab_cdc_deleted_at field.
+ * For sync modes which use a primary key, the stream provides a composite key of (id1, id2). For
+ * sync modes which use a cursor, the stream provides an updated_at field. The stream also has an
+ * _ab_cdc_deleted_at field.
  */
-// Remember to set `'junit.jupiter.execution.parallel.enabled': 'true'` in your connector's build.gradle.
+// Remember to set `'junit.jupiter.execution.parallel.enabled': 'true'` in your connector's
+// build.gradle.
 // See destination-bigquery for an example.
 @Execution(ExecutionMode.CONCURRENT)
 public abstract class BaseTypingDedupingTest {
+
   private static final Logger LOGGER = LoggerFactory.getLogger(BaseTypingDedupingTest.class);
   private static final Comparator<JsonNode> RAW_RECORD_IDENTITY_COMPARATOR = Comparator
       .comparingLong((JsonNode record) -> asInt(record.get("_airbyte_data").get("id1")))
@@ -74,10 +78,11 @@ public abstract class BaseTypingDedupingTest {
       .thenComparing(record -> asString(record.get("_airbyte_raw_id")));
 
   /**
-   * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this field.
+   * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this
+   * field.
    * <p>
-   * That method should also start testcontainer(s), if you're using them. That test container will be used for all
-   * tests. This is safe because each test uses a randomized stream namespace+name.
+   * That method should also start testcontainer(s), if you're using them. That test container will be
+   * used for all tests. This is safe because each test uses a randomized stream namespace+name.
    */
   protected static JsonNode config;
 
@@ -90,25 +95,31 @@ public abstract class BaseTypingDedupingTest {
   protected abstract String getImageName();
 
   /**
-   * For a given stream, return the records that exist in the destination's raw table. This _should_ include metadata columns (e.g. _airbyte_raw_id).
-   * The {@code _airbyte_data} column MUST be an {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
+   * For a given stream, return the records that exist in the destination's raw table. This _should_
+   * include metadata columns (e.g. _airbyte_raw_id). The {@code _airbyte_data} column MUST be an
+   * {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
    * <p>
-   * Each record should be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_loaded_at": "..", "_airbyte_data": {fields...}}.
+   * Each record should be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...",
+   * "_airbyte_loaded_at": "..", "_airbyte_data": {fields...}}.
    */
   protected abstract List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws Exception;
 
   /**
-   * For a given stream, return the records that exist in the destination's final table. This _should_ include metadata columns (e.g. _airbyte_raw_id).
+   * For a given stream, return the records that exist in the destination's final table. This _should_
+   * include metadata columns (e.g. _airbyte_raw_id).
    * <p>
-   * Each record should be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_meta": "..", "field1": ..., "field2": ..., ...}.
+   * Each record should be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...",
+   * "_airbyte_meta": "..", "field1": ..., "field2": ..., ...}.
    */
   protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
 
   /**
-   * Delete any resources in the destination associated with this stream AND its namespace. We need this because we write
-   * raw tables to a shared {@code airbyte} namespace, which we can't drop wholesale.
+   * Delete any resources in the destination associated with this stream AND its namespace. We need
+   * this because we write raw tables to a shared {@code airbyte} namespace, which we can't drop
+   * wholesale.
    * <p>
-   * In general, this should resemble {@code DROP TABLE airbyte.namespace_name; DROP SCHEMA namespace}.
+   * In general, this should resemble
+   * {@code DROP TABLE airbyte.namespace_name; DROP SCHEMA namespace}.
    */
   protected abstract void teardownStreamAndNamespace(String streamNamespace, String streamName) throws Exception;
 
@@ -125,8 +136,9 @@ public void teardown() throws Exception {
   }
 
   /**
-   * Starting with an empty destination, execute a full refresh overwrite sync. Verify that the records are written to
-   * the destination table. Then run a second sync, and verify that the records are overwritten.
+   * Starting with an empty destination, execute a full refresh overwrite sync. Verify that the
+   * records are written to the destination table. Then run a second sync, and verify that the records
+   * are overwritten.
    */
   @Test
   public void fullRefreshOverwrite() throws Exception {
@@ -187,8 +199,7 @@ private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode>
 
     assertAll(
         () -> assertTrue(rawDiff.isEmpty(), "Raw table was incorrect.\n" + rawDiff),
-        () -> assertTrue(finalDiff.isEmpty(), "Final table was incorrect.\n" + finalDiff)
-    );
+        () -> assertTrue(finalDiff.isEmpty(), "Final table was incorrect.\n" + finalDiff));
   }
 
   private static String diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
@@ -226,20 +237,23 @@ private static String getFieldIfPresent(JsonNode record, String field) {
   }
 
   /**
-   * Generate a human-readable diff between the two lists. Only checks the keys specified in expectedRecords.
-   * Assumes (in general) that two records with the same PK, cursor, and extracted_at are the same record.
+   * Generate a human-readable diff between the two lists. Only checks the keys specified in
+   * expectedRecords. Assumes (in general) that two records with the same PK, cursor, and extracted_at
+   * are the same record.
    *
-   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same PK+cursor+extracted_at)
-   * @param sortComparator Behaves identically to identityComparator, but if two records are the same, breaks that tie using _airbyte_raw_id
+   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
+   *        PK+cursor+extracted_at)
+   * @param sortComparator Behaves identically to identityComparator, but if two records are the same,
+   *        breaks that tie using _airbyte_raw_id
    * @return The diff, or empty string if there were no differences
    */
   private static String diffRecords(
-      List<JsonNode> originalExpectedRecords,
-      List<JsonNode> originalActualRecords,
-      Comparator<JsonNode> identityComparator,
-      Comparator<JsonNode> sortComparator,
-      Function<JsonNode, String> recordIdExtractor,
-      boolean extractRawData) {
+                                    List<JsonNode> originalExpectedRecords,
+                                    List<JsonNode> originalActualRecords,
+                                    Comparator<JsonNode> identityComparator,
+                                    Comparator<JsonNode> sortComparator,
+                                    Function<JsonNode, String> recordIdExtractor,
+                                    boolean extractRawData) {
     List<JsonNode> expectedRecords = originalExpectedRecords.stream().sorted(sortComparator).toList();
     List<JsonNode> actualRecords = originalActualRecords.stream().sorted(sortComparator).toList();
 
@@ -295,11 +309,13 @@ private static String diffRecords(
         expectedRecordIndex++;
         actualRecordIndex++;
       } else if (compare < 0) {
-        // The expected record is missing from the actual records. Print it and move on to the next expected record.
+        // The expected record is missing from the actual records. Print it and move on to the next expected
+        // record.
         message += "Row was expected but missing: " + expectedRecord + "\n";
         expectedRecordIndex++;
       } else {
-        // There's an actual record which isn't present in the expected records. Print it and move on to the next actual record.
+        // There's an actual record which isn't present in the expected records. Print it and move on to the
+        // next actual record.
         message += "Row was not expected but present: " + actualRecord + "\n";
         actualRecordIndex++;
       }
@@ -343,9 +359,9 @@ private static Instant asTimestamp(JsonNode node) {
     }
   }
 
-  /* !!!!!! WARNING !!!!!!
-   * The code below was mostly copypasted from DestinationAcceptanceTest. If you make edits here, you probably want to also edit there.
-   * !!!!!!!!!!!!!!!!!!!!!
+  /*
+   * !!!!!! WARNING !!!!!! The code below was mostly copypasted from DestinationAcceptanceTest. If you
+   * make edits here, you probably want to also edit there. !!!!!!!!!!!!!!!!!!!!!
    */
 
   private Path jobRoot;
@@ -384,8 +400,8 @@ private void runSync(ConfiguredAirbyteCatalog catalog, List<AirbyteMessage> mess
         new EnvVariableFeatureFlags()));
 
     destination.start(destinationConfig, jobRoot, Collections.emptyMap());
-    messages.forEach(message -> Exceptions.toRuntime(() ->
-        destination.accept(convertProtocolObject(message, io.airbyte.protocol.models.AirbyteMessage.class))));
+    messages.forEach(
+        message -> Exceptions.toRuntime(() -> destination.accept(convertProtocolObject(message, io.airbyte.protocol.models.AirbyteMessage.class))));
     destination.notifyEndOfInput();
 
     while (!destination.isFinished()) {

From 24d0ca475d8daa7500be29017a062ea172b6330d Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Thu, 29 Jun 2023 20:21:38 -0700
Subject: [PATCH 06/46] clarify comment

---
 .../src/main/resources/sync1_messages.jsonl                     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
index b2fc2a1ea173..0bb48b2cb1cf 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
@@ -2,5 +2,5 @@
 // This obviously makes no sense in relation to updated_at being in the year 2000, but that's OK
 // because (from destinations POV) updated_at has no relation to emitted_at.
 {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "San Francisco", "state": "CA"}}}}
-// Emit a second record for id=(1,200). This generally doesn't happen in full refresh syncs - but if T+D is implemented correctly, it shouldn't matter (i.e. both records should be written to the final table).
+// Emit a second record for id=(1,200) with a different updated_at. This generally doesn't happen in full refresh syncs - but if T+D is implemented correctly, it shouldn't matter (i.e. both records should be written to the final table).
 {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Los Angeles", "state": "CA"}}}}

From d23ba3f607fd998430a16dea96dd73a4fb380f50 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Thu, 29 Jun 2023 20:28:11 -0700
Subject: [PATCH 07/46] minor tweaks

---
 .../destination/typing_deduping/BaseTypingDedupingTest.java  | 5 +++--
 .../src/main/resources/sync1_messages.jsonl                  | 4 +++-
 .../destination/bigquery/BigQueryDestinationTestUtils.java   | 3 +++
 .../BigQueryStandardInsertsTypingDedupingTest.java           | 4 ----
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index d80b98e481af..d0cf73831955 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -125,8 +125,9 @@ public abstract class BaseTypingDedupingTest {
 
   @BeforeEach
   public void setup() {
-    streamNamespace = Strings.addRandomSuffix("typing_deduping_test_namespace", "_", 5);
-    streamName = Strings.addRandomSuffix("test_stream", "_", 5);
+    streamNamespace = Strings.addRandomSuffix("typing_deduping_test", "_", 5);
+    // we don't randomize this, because randomizing the namespace is sufficient.
+    streamName = "test_stream";
     LOGGER.info("Using stream namespace {} and name {}", streamNamespace, streamName);
   }
 
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
index 0bb48b2cb1cf..1e0bee4bcd5a 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
@@ -2,5 +2,7 @@
 // This obviously makes no sense in relation to updated_at being in the year 2000, but that's OK
 // because (from destinations POV) updated_at has no relation to emitted_at.
 {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "San Francisco", "state": "CA"}}}}
-// Emit a second record for id=(1,200) with a different updated_at. This generally doesn't happen in full refresh syncs - but if T+D is implemented correctly, it shouldn't matter (i.e. both records should be written to the final table).
+// Emit a second record for id=(1,200) with a different updated_at. This generally doesn't happen
+// in full refresh syncs - but if T+D is implemented correctly, it shouldn't matter
+// (i.e. both records should be written to the final table).
 {"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Los Angeles", "state": "CA"}}}}
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTestUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTestUtils.java
index cdc5e042078f..cac72e263a43 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTestUtils.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTestUtils.java
@@ -21,8 +21,10 @@
 import java.util.LinkedList;
 import java.util.List;
 import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class BigQueryDestinationTestUtils {
+  private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDestinationTestUtils.class);
 
   /**
    * Parse the config file and replace dataset with datasetId randomly generated by the test
@@ -33,6 +35,7 @@ public class BigQueryDestinationTestUtils {
    * @throws IOException
    */
   public static JsonNode createConfig(Path configFile, String datasetId) throws IOException {
+    LOGGER.info("Setting default dataset to {}", datasetId);
     final String tmpConfigAsString = Files.readString(configFile);
     final JsonNode tmpConfigJson = Jsons.deserialize(tmpConfigAsString);
     return Jsons.jsonNode(((ObjectNode) tmpConfigJson).put(BigQueryConsts.CONFIG_DATASET_ID, datasetId));
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
index e5bc2fa51a73..1fd4d0eaba13 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
@@ -19,11 +19,8 @@
 import java.util.List;
 import java.util.Map;
 import org.junit.jupiter.api.BeforeAll;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 public class BigQueryStandardInsertsTypingDedupingTest extends BaseTypingDedupingTest {
-  private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryStandardInsertsTypingDedupingTest.class);
 
   private static BigQuery bq;
 
@@ -31,7 +28,6 @@ public class BigQueryStandardInsertsTypingDedupingTest extends BaseTypingDedupin
   @BeforeAll
   public static void buildConfig() throws IOException {
     final String datasetId = Strings.addRandomSuffix("typing_deduping_default_dataset", "_", 5);
-    LOGGER.info("Setting default dataset to {}", datasetId);
     config = BigQueryDestinationTestUtils.createConfig(Path.of("secrets/credentials-1s1t-standard.json"), datasetId);
     bq = BigQueryDestination.getBigQuery(config);
   }

From 665fb3af7c56fd78f7847a796dafb6b40b7fad11 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 07:23:25 -0700
Subject: [PATCH 08/46] more stuff

---
 .../BaseTypingDedupingTest.java                | 18 +++++++-----------
 ...QueryStandardInsertsTypingDedupingTest.java | 10 +++++-----
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index d0cf73831955..16a346b7aa0e 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -95,21 +95,17 @@ public abstract class BaseTypingDedupingTest {
   protected abstract String getImageName();
 
   /**
-   * For a given stream, return the records that exist in the destination's raw table. This _should_
-   * include metadata columns (e.g. _airbyte_raw_id). The {@code _airbyte_data} column MUST be an
-   * {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
+   * For a given stream, return the records that exist in the destination's raw table. Each record must be in the format
+   * {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_loaded_at": "...", "_airbyte_data": {fields...}}.
    * <p>
-   * Each record should be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...",
-   * "_airbyte_loaded_at": "..", "_airbyte_data": {fields...}}.
+   * The {@code _airbyte_data} column must be an {@link com.fasterxml.jackson.databind.node.ObjectNode}
+   * (i.e. it cannot be a string value).
    */
   protected abstract List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws Exception;
 
   /**
-   * For a given stream, return the records that exist in the destination's final table. This _should_
-   * include metadata columns (e.g. _airbyte_raw_id).
-   * <p>
-   * Each record should be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...",
-   * "_airbyte_meta": "..", "field1": ..., "field2": ..., ...}.
+   * For a given stream, return the records that exist in the destination's final table. Each record must be in the
+   * format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_meta": {...}, "field1": ..., "field2": ..., ...}.
    */
   protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
 
@@ -119,7 +115,7 @@ public abstract class BaseTypingDedupingTest {
    * wholesale.
    * <p>
    * In general, this should resemble
-   * {@code DROP TABLE airbyte.namespace_name; DROP SCHEMA namespace}.
+   * {@code DROP TABLE IF EXISTS airbyte.namespace_name; DROP SCHEMA IF EXISTS namespace}.
    */
   protected abstract void teardownStreamAndNamespace(String streamNamespace, String streamName) throws Exception;
 
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
index 1fd4d0eaba13..4cb909e382d8 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
@@ -53,21 +53,21 @@ protected List<JsonNode> dumpFinalTableRecords(String streamNamespace, String st
 
   @Override
   protected void teardownStreamAndNamespace(String streamNamespace, String streamName) {
+    // bq.delete simply returns false if the table/schema doesn't exist (e.g. if the connector failed to create it)
     bq.delete(TableId.of("airbyte", streamNamespace + "_" + streamName));
     bq.delete(DatasetId.of(streamNamespace), BigQuery.DatasetDeleteOption.deleteContents());
   }
 
   private static JsonNode toJson(LinkedHashMap<String, Object> map) {
     ObjectNode o = (ObjectNode) Jsons.emptyObject();
-    for (Map.Entry<String, Object> entry : map.entrySet()) {
-      Object value = entry.getValue();
+    map.forEach((key, value) -> {
       if (value instanceof Instant i) {
         // naively converting an Instant returns a DecimalNode with the unix epoch, so instead we manually stringify it
-        o.set(entry.getKey(), Jsons.jsonNode(i.toString()));
+        o.set(key, Jsons.jsonNode(i.toString()));
       } else {
-        o.set(entry.getKey(), Jsons.jsonNode(value));
+        o.set(key, Jsons.jsonNode(value));
       }
-    }
+    });
     return o;
   }
 }

From bd61d1331b741f709b941412e4ef53a714dbe232 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Fri, 30 Jun 2023 14:28:28 +0000
Subject: [PATCH 09/46] Automated Commit - Formatting Changes

---
 .../typing_deduping/BaseTypingDedupingTest.java    | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 16a346b7aa0e..7dde276118fa 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -95,17 +95,19 @@ public abstract class BaseTypingDedupingTest {
   protected abstract String getImageName();
 
   /**
-   * For a given stream, return the records that exist in the destination's raw table. Each record must be in the format
-   * {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_loaded_at": "...", "_airbyte_data": {fields...}}.
+   * For a given stream, return the records that exist in the destination's raw table. Each record
+   * must be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...",
+   * "_airbyte_loaded_at": "...", "_airbyte_data": {fields...}}.
    * <p>
-   * The {@code _airbyte_data} column must be an {@link com.fasterxml.jackson.databind.node.ObjectNode}
-   * (i.e. it cannot be a string value).
+   * The {@code _airbyte_data} column must be an
+   * {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
    */
   protected abstract List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws Exception;
 
   /**
-   * For a given stream, return the records that exist in the destination's final table. Each record must be in the
-   * format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_meta": {...}, "field1": ..., "field2": ..., ...}.
+   * For a given stream, return the records that exist in the destination's final table. Each record
+   * must be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_meta":
+   * {...}, "field1": ..., "field2": ..., ...}.
    */
   protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
 

From 9911b5b31b15d2ab89dafe9aade3e9793d2b1988 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 07:46:34 -0700
Subject: [PATCH 10/46] minor cleanup

---
 .../BaseTypingDedupingTest.java               | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 7dde276118fa..4ddce2a28d4d 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -173,23 +173,24 @@ private static JsonNode getSchema() throws IOException {
     return Jsons.deserialize(MoreResources.readResource("schema.json"));
   }
 
-  private List<AirbyteMessage> readMessages(String filename) throws IOException {
+  private List<JsonNode> readRecords(String filename) throws IOException {
     return MoreResources.readResource(filename).lines()
+        .map(String::trim)
+        .filter(line -> !line.isEmpty())
         .filter(line -> !line.startsWith("//"))
-        .map(jsonString -> Jsons.deserialize(jsonString, AirbyteMessage.class))
+        .map(Jsons::deserialize)
+        .toList();
+  }
+
+  private List<AirbyteMessage> readMessages(String filename) throws IOException {
+    return readRecords(filename).stream()
+        .map(record -> Jsons.convertValue(record, AirbyteMessage.class))
         .peek(message -> {
           message.getRecord().setNamespace(streamNamespace);
           message.getRecord().setStream(streamName);
         }).toList();
   }
 
-  private List<JsonNode> readRecords(String filename) throws IOException {
-    return MoreResources.readResource(filename).lines()
-        .filter(line -> !line.startsWith("//"))
-        .map(Jsons::deserialize)
-        .toList();
-  }
-
   private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode> expectedFinalRecords) throws Exception {
     List<JsonNode> actualRawRecords = dumpRawTableRecords(streamNamespace, streamName);
     String rawDiff = diffRawTableRecords(expectedRawRecords, actualRawRecords);

From f06815ede564dcd647befa59da0d231881e771e4 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 09:57:00 -0700
Subject: [PATCH 11/46] lots of fixes

* handle sql vs json null better
* verify extra columns
* only check deleted_at if in DEDUP mode and the column exists
* add full refresh append test case
---
 .../BaseTypingDedupingTest.java               | 117 ++++++++++++++----
 .../src/main/resources/schema.json            |   6 +
 ...drecords_fullrefresh_overwrite_final.jsonl |   2 -
 ...tedrecords_fullrefresh_overwrite_raw.jsonl |   3 -
 ...sync1_expectedrecords_nondedup_final.jsonl |   5 +
 .../sync1_expectedrecords_nondedup_raw.jsonl  |   6 +
 .../src/main/resources/sync1_messages.jsonl   |   8 +-
 ...ctedrecords_fullrefresh_append_final.jsonl |   8 ++
 ...pectedrecords_fullrefresh_append_raw.jsonl |   9 ++
 ...drecords_fullrefresh_overwrite_final.jsonl |   5 +-
 ...tedrecords_fullrefresh_overwrite_raw.jsonl |   5 +-
 .../src/main/resources/sync2_messages.jsonl   |   7 +-
 .../typing_deduping/BigQuerySqlGenerator.java |  23 ++--
 .../BigQuerySqlGeneratorIntegrationTest.java  |   6 +-
 ...ueryStandardInsertsTypingDedupingTest.java |   4 +-
 15 files changed, 169 insertions(+), 45 deletions(-)
 delete mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_final.jsonl
 delete mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_nondedup_final.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_nondedup_raw.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_append_final.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_append_raw.jsonl

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 4ddce2a28d4d..948c5d18b613 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -32,7 +32,9 @@
 import java.time.Instant;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.UUID;
 import java.util.function.Function;
@@ -108,6 +110,12 @@ public abstract class BaseTypingDedupingTest {
    * For a given stream, return the records that exist in the destination's final table. Each record
    * must be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_meta":
    * {...}, "field1": ..., "field2": ..., ...}.
+   * <p>
+   * For JSON-valued columns, there is some nuance: a SQL null should be represented as a missing entry, whereas a JSON
+   * null should be represented as a {@link com.fasterxml.jackson.databind.node.NullNode}. For example, in the JSON blob
+   * {"name": null}, the `name` field is a JSON null, and the `address` field is a SQL null.
+   * <p>
+   * The corresponding SQL looks like {@code INSERT INTO ... (name, address) VALUES ('null' :: jsonb, NULL)}.
    */
   protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
 
@@ -124,8 +132,7 @@ public abstract class BaseTypingDedupingTest {
   @BeforeEach
   public void setup() {
     streamNamespace = Strings.addRandomSuffix("typing_deduping_test", "_", 5);
-    // we don't randomize this, because randomizing the namespace is sufficient.
-    streamName = "test_stream";
+    streamName = Strings.addRandomSuffix("test_stream", "_", 5);
     LOGGER.info("Using stream namespace {} and name {}", streamNamespace, streamName);
   }
 
@@ -155,8 +162,8 @@ public void fullRefreshOverwrite() throws Exception {
 
     runSync(catalog, messages1);
 
-    List<JsonNode> expectedRawRecords1 = readRecords("sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl");
-    List<JsonNode> expectedFinalRecords1 = readRecords("sync1_expectedrecords_fullrefresh_overwrite_final.jsonl");
+    List<JsonNode> expectedRawRecords1 = readRecords("sync1_expectedrecords_nondedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords1 = readRecords("sync1_expectedrecords_nondedup_final.jsonl");
     verifySyncResult(expectedRawRecords1, expectedFinalRecords1);
 
     // Second sync
@@ -169,6 +176,41 @@ public void fullRefreshOverwrite() throws Exception {
     verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
   }
 
+  /**
+   * Starting with an empty destination, execute a full refresh append sync. Verify that the
+   * records are written to the destination table. Then run a second sync, and verify that the old and new records
+   * are all present.
+   */
+  @Test
+  public void fullRefreshAppend() throws Exception {
+    ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
+        new ConfiguredAirbyteStream()
+            .withSyncMode(SyncMode.FULL_REFRESH)
+            .withDestinationSyncMode(DestinationSyncMode.APPEND)
+            .withStream(new AirbyteStream()
+                .withNamespace(streamNamespace)
+                .withName(streamName)
+                .withJsonSchema(getSchema()))));
+
+    // First sync
+    List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl");
+
+    runSync(catalog, messages1);
+
+    List<JsonNode> expectedRawRecords1 = readRecords("sync1_expectedrecords_nondedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords1 = readRecords("sync1_expectedrecords_nondedup_final.jsonl");
+    verifySyncResult(expectedRawRecords1, expectedFinalRecords1);
+
+    // Second sync
+    List<AirbyteMessage> messages2 = readMessages("sync2_messages.jsonl");
+
+    runSync(catalog, messages2);
+
+    List<JsonNode> expectedRawRecords2 = readRecords("sync2_expectedrecords_fullrefresh_append_raw.jsonl");
+    List<JsonNode> expectedFinalRecords2 = readRecords("sync2_expectedrecords_fullrefresh_append_final.jsonl");
+    verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
+  }
+
   private static JsonNode getSchema() throws IOException {
     return Jsons.deserialize(MoreResources.readResource("schema.json"));
   }
@@ -271,37 +313,42 @@ private static String diffRecords(
         // These records should be the same. Find the specific fields that are different.
         boolean foundMismatch = false;
         String mismatchedRecordMessage = "Row had incorrect data:" + recordIdExtractor.apply(expectedRecord) + "\n";
-        for (String key : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
-          if (extractRawData && "_airbyte_data".equals(key)) {
+        // Iterate through each field in the expected record and compare it to the actual record's value.
+        for (String column : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
+          if (extractRawData && "_airbyte_data".equals(column)) {
             JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
             JsonNode actualRawData = actualRecord.get("_airbyte_data");
             for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
               JsonNode expectedValue = expectedRawData.get(field);
               JsonNode actualValue = actualRawData.get(field);
-              // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
-              if (!Objects.equals(expectedValue, actualValue)
-                  // Objects.equals expects the two values to be the same class.
-                  // We need to handle comparisons between e.g. LongNode and IntNode.
-                  && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
-                  && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble())) {
-                mismatchedRecordMessage += "  For _airbyte_data." + field + ", expected " + expectedValue + " but got " + actualValue + "\n";
+              if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
+                mismatchedRecordMessage += generateFieldError("_airbyte_data." + field, expectedValue, actualValue);
+                foundMismatch = true;
+              }
+            }
+            LinkedHashMap<String, JsonNode> extraColumns = checkForExtraFields(expectedRawData, actualRawData);
+            if (extraColumns.size() > 0) {
+              for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
+                mismatchedRecordMessage += generateFieldError("_airbyte_data." + extraColumn.getKey(), null, extraColumn.getValue());
                 foundMismatch = true;
               }
             }
           } else {
-            JsonNode expectedValue = expectedRecord.get(key);
-            JsonNode actualValue = actualRecord.get(key);
-            // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
-            if (!Objects.equals(expectedValue, actualValue)
-                // Objects.equals expects the two values to be the same class.
-                // We need to handle comparisons between e.g. LongNode and IntNode.
-                && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
-                && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble())) {
-              mismatchedRecordMessage += "  For key " + key + ", expected " + expectedValue + " but got " + actualValue + "\n";
+            JsonNode expectedValue = expectedRecord.get(column);
+            JsonNode actualValue = actualRecord.get(column);
+            if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
+              mismatchedRecordMessage += generateFieldError("column " + column, expectedValue, actualValue);
               foundMismatch = true;
             }
           }
         }
+        LinkedHashMap<String, JsonNode> extraColumns = checkForExtraFields(expectedRecord, actualRecord);
+        if (extraColumns.size() > 0) {
+          for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
+            mismatchedRecordMessage += generateFieldError("column " + extraColumn.getKey(), null, extraColumn.getValue());
+            foundMismatch = true;
+          }
+        }
         if (foundMismatch) {
           message += mismatchedRecordMessage;
         }
@@ -333,6 +380,32 @@ private static String diffRecords(
     return message;
   }
 
+  private static boolean jsonNodesNotEquivalent(JsonNode expectedValue, JsonNode actualValue) {
+    // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
+    return !Objects.equals(expectedValue, actualValue)
+        // Objects.equals expects the two values to be the same class.
+        // We need to handle comparisons between e.g. LongNode and IntNode.
+        && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
+        && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble());
+  }
+
+  private static LinkedHashMap<String, JsonNode> checkForExtraFields(JsonNode expectedRecord, JsonNode actualRecord) {
+    LinkedHashMap<String, JsonNode> extraFields = new LinkedHashMap<>();
+    for (String column : Streams.stream(actualRecord.fieldNames()).sorted().toList()) {
+      // loaded_at and raw_id are generated dynamically, so we just ignore them.
+      if (!"_airbyte_loaded_at".equals(column) && !"_airbyte_raw_id".equals(column) && !expectedRecord.has(column)) {
+        extraFields.put(column, actualRecord.get(column));
+      }
+    }
+    return extraFields;
+  }
+
+  private static String generateFieldError(String fieldname, JsonNode expectedValue, JsonNode actualValue) {
+    String expectedString = expectedValue == null ? "SQL NULL (i.e. no value)" : expectedValue.toString();
+    String actualString = actualValue == null ? "SQL NULL (i.e. no value)" : actualValue.toString();
+    return "  For " + fieldname + ", expected " + expectedString + " but got " + actualString + "\n";
+  }
+
   private static long asInt(JsonNode node) {
     if (node == null || !node.isIntegralNumber()) {
       return Integer.MIN_VALUE;
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/schema.json b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/schema.json
index cc196c91f5e5..e391324deaf7 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/schema.json
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/schema.json
@@ -18,6 +18,12 @@
         "city": { "type": "string" },
         "state": { "type": "string" }
       }
+    },
+    "age": { "type": "integer" },
+    "registration_date": {
+      "type": "string",
+      "format": "date",
+      "airbyte_type": "date"
     }
   }
 }
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_final.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_final.jsonl
deleted file mode 100644
index 0b68fdcc802f..000000000000
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_final.jsonl
+++ /dev/null
@@ -1,2 +0,0 @@
-{"_airbyte_extracted_at":"1970-01-01T00:00:01Z","_airbyte_meta":{"errors":[]},"id1":1,"id2":200,"updated_at":"2000-01-01T00:00:00Z","_ab_cdc_deleted_at":null,"name":"Alice","address":{"city":"San Francisco","state":"CA"}}
-{"_airbyte_extracted_at":"1970-01-01T00:00:01Z","_airbyte_meta":{"errors":[]},"id1":1,"id2":200,"updated_at":"2000-01-01T00:01:00Z","_ab_cdc_deleted_at":null,"name":"Alice","address":{"city":"Los Angeles","state":"CA"}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl
deleted file mode 100644
index 3010e4b5d73d..000000000000
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_fullrefresh_overwrite_raw.jsonl
+++ /dev/null
@@ -1,3 +0,0 @@
-{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "San Francisco", "state": "CA"}}}
-// Note the duplicate record. In this sync mode, we don't dedup anything.
-{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Los Angeles", "state": "CA"}}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_nondedup_final.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_nondedup_final.jsonl
new file mode 100644
index 000000000000..623527f41e75
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_nondedup_final.jsonl
@@ -0,0 +1,5 @@
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}
+// Invalid columns are nulled out (i.e. SQL null, not JSON null)
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie"}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_nondedup_raw.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_nondedup_raw.jsonl
new file mode 100644
index 000000000000..4b4db08115e5
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_nondedup_raw.jsonl
@@ -0,0 +1,6 @@
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}}
+// Note the duplicate record. In this sync mode, we don't dedup anything.
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}
+// Invalid data is still allowed in the raw table.
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
index 1e0bee4bcd5a..4c5dec1a24ea 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_messages.jsonl
@@ -1,8 +1,12 @@
 // emitted_at:1000 is equal to 1970-01-01 00:00:01Z, which is what you'll see in the expected records.
 // This obviously makes no sense in relation to updated_at being in the year 2000, but that's OK
 // because (from destinations POV) updated_at has no relation to emitted_at.
-{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "San Francisco", "state": "CA"}}}}
+{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}}}
 // Emit a second record for id=(1,200) with a different updated_at. This generally doesn't happen
 // in full refresh syncs - but if T+D is implemented correctly, it shouldn't matter
 // (i.e. both records should be written to the final table).
-{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Los Angeles", "state": "CA"}}}}
+{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}}
+// Emit a record with no _ab_cdc_deleted_at field. CDC sources typically emit an explicit null, but we should handle both cases.
+{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}}
+// Emit a record with an invalid age.
+{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_append_final.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_append_final.jsonl
new file mode 100644
index 000000000000..2e935f18f357
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_append_final.jsonl
@@ -0,0 +1,8 @@
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie"}
+
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "name": "Bob", "address": {"city": "New York", "state": "NY"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01:00:00:00Z"}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_append_raw.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_append_raw.jsonl
new file mode 100644
index 000000000000..5cf2a7f389ce
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_append_raw.jsonl
@@ -0,0 +1,9 @@
+// We keep the records from the first sync
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}
+// And append the records from the second sync
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01:00:00:00Z"}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl
index 80df5e903881..0c06d6b00117 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl
@@ -1,2 +1,3 @@
-{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1":  1, "id2":  200, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Seattle", "state": "WA"}}
-{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1":  1, "id2":  201, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Bob", "address":  {"city":  "New York", "state": "NY"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "name": "Bob", "address": {"city": "New York", "state": "NY"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01:00:00:00Z"}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl
index 6f48c9630b3d..79554272b9a6 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl
@@ -1,2 +1,3 @@
-{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Seattle", "state": "WA"}}}
-{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1":  1, "id2":  201, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Bob", "address":  {"city":  "New York", "state": "NY"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01:00:00:00Z"}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_messages.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_messages.jsonl
index 49d8f5a605eb..1f828f31f5d3 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_messages.jsonl
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_messages.jsonl
@@ -1,2 +1,5 @@
-{"type": "RECORD", "record": {"emitted_at": 2000, "data": {"id1":  1, "id2":  200, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Alice", "address":  {"city":  "Seattle", "state": "WA"}}}}
-{"type": "RECORD", "record": {"emitted_at": 2000, "data": {"id1":  1, "id2":  201, "updated_at":  "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at":  null, "name":  "Bob", "address":  {"city":  "New York", "state": "NY"}}}}
+{"type": "RECORD", "record": {"emitted_at": 2000, "data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}}}
+{"type": "RECORD", "record": {"emitted_at": 2000, "data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}}}
+// Set deleted_at to something non-null. Again, T+D doesn't check the actual _value_ of deleted_at (i.e. the fact that it's in the past is irrelevant).
+// It only cares whether deleted_at is non-null. So this should delete Bob from the final table (in dedup mode).
+{"type": "RECORD", "record": {"emitted_at": 2000, "data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01:00:00:00Z"}}}
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
index d32ecec9456b..c4c3be8a91af 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
@@ -221,7 +221,7 @@ public String updateTable(final String finalSuffix, final StreamConfig stream) {
     if (stream.destinationSyncMode() == DestinationSyncMode.APPEND_DEDUP) {
       validatePrimaryKeys = validatePrimaryKeys(stream.id(), stream.primaryKey(), stream.columns());
     }
-    final String insertNewRecords = insertNewRecords(stream.id(), finalSuffix, stream.columns());
+    final String insertNewRecords = insertNewRecords(stream.id(), finalSuffix, stream.columns(), stream.destinationSyncMode());
     String dedupFinalTable = "";
     String dedupRawTable = "";
     if (stream.destinationSyncMode() == DestinationSyncMode.APPEND_DEDUP) {
@@ -283,7 +283,7 @@ SELECT COUNT(1)
   }
 
   @VisibleForTesting
-  String insertNewRecords(final StreamId id, final String finalSuffix, final LinkedHashMap<ColumnId, AirbyteType> streamColumns) {
+  String insertNewRecords(final StreamId id, final String finalSuffix, final LinkedHashMap<ColumnId, AirbyteType> streamColumns, DestinationSyncMode destinationSyncMode) {
     final String columnCasts = streamColumns.entrySet().stream().map(
         col -> extractAndCast(col.getKey(), col.getValue()) + " as " + col.getKey().name(QUOTE) + ",")
         .collect(joining("\n"));
@@ -302,6 +302,17 @@ String insertNewRecords(final StreamId id, final String finalSuffix, final Linke
                 END"""))
         .collect(joining(",\n"));
     final String columnList = streamColumns.keySet().stream().map(quotedColumnId -> quotedColumnId.name(QUOTE) + ",").collect(joining("\n"));
+    final String deletionClause;
+    if (destinationSyncMode == DestinationSyncMode.APPEND_DEDUP && streamColumns.keySet().stream().anyMatch(col -> "_ab_cdc_deleted_at".equals(col.originalName()))) {
+      deletionClause = """
+          AND (
+            JSON_QUERY(`_airbyte_data`, '$._ab_cdc_deleted_at') IS NULL
+            OR JSON_TYPE(JSON_QUERY(`_airbyte_data`, '$._ab_cdc_deleted_at')) = 'null'
+          )
+          """;
+    } else {
+      deletionClause = "";
+    }
 
     // Note that we intentionally excluded deleted records from this insert. See dedupRawRecords for an
     // explanation of how CDC deletes work.
@@ -310,7 +321,8 @@ String insertNewRecords(final StreamId id, final String finalSuffix, final Linke
         "final_table_id", id.finalTableId(finalSuffix, QUOTE),
         "column_casts", columnCasts,
         "column_errors", columnErrors,
-        "column_list", columnList)).replace(
+        "column_list", columnList,
+        "deletion_clause", deletionClause)).replace(
             """
             INSERT INTO ${final_table_id}
             (
@@ -330,10 +342,7 @@ WITH intermediate_data AS (
               FROM ${raw_table_id}
               WHERE
                 _airbyte_loaded_at IS NULL
-                AND (
-                  JSON_QUERY(`_airbyte_data`, '$._ab_cdc_deleted_at') IS NULL
-                  OR JSON_TYPE(JSON_QUERY(`_airbyte_data`, '$._ab_cdc_deleted_at')) = 'null'
-                )
+            ${deletion_clause}
             )
             SELECT
             ${column_list}
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
index e6117f0a7813..ac4f3bf71a32 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
@@ -216,7 +216,7 @@ public void testInsertNewRecordsIncremental() throws InterruptedException {
                 """))
         .build());
 
-    final String sql = GENERATOR.insertNewRecords(streamId, "", COLUMNS);
+    final String sql = GENERATOR.insertNewRecords(streamId, "", COLUMNS, DestinationSyncMode.OVERWRITE);
     logAndExecute(sql);
 
     final TableResult result = bq.query(QueryJobConfiguration.newBuilder("SELECT * FROM " + streamId.finalTableId(QUOTE)).build());
@@ -897,6 +897,8 @@ private static void logAndExecute(final String sql) throws InterruptedException
   /**
    * FieldValueList stores everything internally as string (I think?) but provides conversions to more useful types.
    * This method does that conversion, using the schema to determine which type is most appropriate.
+   * <p>
+   * SQL nulls are represented as explicit null values. JSON nulls are represented as {@link com.fasterxml.jackson.databind.node.NullNode}.
    */
   private static LinkedHashMap<String, Object> toMap(Schema schema, FieldValueList row) {
     final LinkedHashMap<String, Object> map = new LinkedHashMap<>();
@@ -904,7 +906,7 @@ private static LinkedHashMap<String, Object> toMap(Schema schema, FieldValueList
       final Field field = schema.getFields().get(i);
       final FieldValue value = row.get(i);
       Object typedValue;
-      if (value.getValue() == null) {
+      if (value.isNull()) {
         typedValue = null;
       } else {
         typedValue = switch (field.getType().getStandardType()) {
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
index 4cb909e382d8..0da1f2945c65 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
@@ -61,7 +61,9 @@ protected void teardownStreamAndNamespace(String streamNamespace, String streamN
   private static JsonNode toJson(LinkedHashMap<String, Object> map) {
     ObjectNode o = (ObjectNode) Jsons.emptyObject();
     map.forEach((key, value) -> {
-      if (value instanceof Instant i) {
+      if (value == null) {
+        // If the value is null, do nothing. We don't want to insert it into the json at all.
+      } else if (value instanceof Instant i) {
         // naively converting an Instant returns a DecimalNode with the unix epoch, so instead we manually stringify it
         o.set(key, Jsons.jsonNode(i.toString()));
       } else {

From d05365a89c09df4e042d7e6570758b0be2a1186a Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Fri, 30 Jun 2023 17:00:58 +0000
Subject: [PATCH 12/46] Automated Commit - Formatting Changes

---
 .../typing_deduping/BaseTypingDedupingTest.java  | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 948c5d18b613..861ac7e34c08 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -111,11 +111,13 @@ public abstract class BaseTypingDedupingTest {
    * must be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...", "_airbyte_meta":
    * {...}, "field1": ..., "field2": ..., ...}.
    * <p>
-   * For JSON-valued columns, there is some nuance: a SQL null should be represented as a missing entry, whereas a JSON
-   * null should be represented as a {@link com.fasterxml.jackson.databind.node.NullNode}. For example, in the JSON blob
-   * {"name": null}, the `name` field is a JSON null, and the `address` field is a SQL null.
+   * For JSON-valued columns, there is some nuance: a SQL null should be represented as a missing
+   * entry, whereas a JSON null should be represented as a
+   * {@link com.fasterxml.jackson.databind.node.NullNode}. For example, in the JSON blob {"name":
+   * null}, the `name` field is a JSON null, and the `address` field is a SQL null.
    * <p>
-   * The corresponding SQL looks like {@code INSERT INTO ... (name, address) VALUES ('null' :: jsonb, NULL)}.
+   * The corresponding SQL looks like
+   * {@code INSERT INTO ... (name, address) VALUES ('null' :: jsonb, NULL)}.
    */
   protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
 
@@ -177,9 +179,9 @@ public void fullRefreshOverwrite() throws Exception {
   }
 
   /**
-   * Starting with an empty destination, execute a full refresh append sync. Verify that the
-   * records are written to the destination table. Then run a second sync, and verify that the old and new records
-   * are all present.
+   * Starting with an empty destination, execute a full refresh append sync. Verify that the records
+   * are written to the destination table. Then run a second sync, and verify that the old and new
+   * records are all present.
    */
   @Test
   public void fullRefreshAppend() throws Exception {

From 55887680a4c0319135b930942d4035744f438841 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 11:44:12 -0700
Subject: [PATCH 13/46] add tests for the remaining sync modes

---
 .../BaseTypingDedupingTest.java               | 75 +++++++++++++++++++
 .../sync1_expectedrecords_dedup_final.jsonl   |  4 +
 .../sync1_expectedrecords_dedup_raw.jsonl     |  4 +
 ...ectedrecords_incremental_dedup_final.jsonl |  3 +
 ...xpectedrecords_incremental_dedup_raw.jsonl |  5 ++
 .../typing_deduping/BigQuerySqlGenerator.java |  2 +-
 6 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_dedup_final.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_dedup_raw.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_incremental_dedup_final.jsonl
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_incremental_dedup_raw.jsonl

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 861ac7e34c08..db43e67bab3f 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -213,6 +213,81 @@ public void fullRefreshAppend() throws Exception {
     verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
   }
 
+  /**
+   * Starting with an empty destination, execute an incremental append sync.
+   * <p>
+   * This is (not so secretly) identical to {@link #fullRefreshAppend()}, and uses the same set of expected records.
+   * Incremental as a concept only exists in the source. From the destination's perspective, we only care about the
+   * destination sync mode.
+   */
+  @Test
+  public void incrementalAppend() throws Exception {
+    ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
+        new ConfiguredAirbyteStream()
+            // These two lines are literally the only difference between this test and fullRefreshAppend
+            .withSyncMode(SyncMode.INCREMENTAL)
+            .withCursorField(List.of("updated_at"))
+            .withDestinationSyncMode(DestinationSyncMode.APPEND)
+            .withStream(new AirbyteStream()
+                .withNamespace(streamNamespace)
+                .withName(streamName)
+                .withJsonSchema(getSchema()))));
+
+    // First sync
+    List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl");
+
+    runSync(catalog, messages1);
+
+    List<JsonNode> expectedRawRecords1 = readRecords("sync1_expectedrecords_nondedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords1 = readRecords("sync1_expectedrecords_nondedup_final.jsonl");
+    verifySyncResult(expectedRawRecords1, expectedFinalRecords1);
+
+    // Second sync
+    List<AirbyteMessage> messages2 = readMessages("sync2_messages.jsonl");
+
+    runSync(catalog, messages2);
+
+    List<JsonNode> expectedRawRecords2 = readRecords("sync2_expectedrecords_fullrefresh_append_raw.jsonl");
+    List<JsonNode> expectedFinalRecords2 = readRecords("sync2_expectedrecords_fullrefresh_append_final.jsonl");
+    verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
+  }
+
+  /**
+   * Starting with an empty destination, execute an incremental dedup sync. Verify that the records are written to the
+   * destination table. Then run a second sync, and verify that the raw/final tables contain the correct records.
+   */
+  @Test
+  public void incrementalDedup() throws Exception {
+    ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
+        new ConfiguredAirbyteStream()
+            .withSyncMode(SyncMode.INCREMENTAL)
+            .withCursorField(List.of("updated_at"))
+            .withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP)
+            .withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
+            .withStream(new AirbyteStream()
+                .withNamespace(streamNamespace)
+                .withName(streamName)
+                .withJsonSchema(getSchema()))));
+
+    // First sync
+    List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl");
+
+    runSync(catalog, messages1);
+
+    List<JsonNode> expectedRawRecords1 = readRecords("sync1_expectedrecords_dedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords1 = readRecords("sync1_expectedrecords_dedup_final.jsonl");
+    verifySyncResult(expectedRawRecords1, expectedFinalRecords1);
+
+    // Second sync
+    List<AirbyteMessage> messages2 = readMessages("sync2_messages.jsonl");
+
+    runSync(catalog, messages2);
+
+    List<JsonNode> expectedRawRecords2 = readRecords("sync2_expectedrecords_incremental_dedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords2 = readRecords("sync2_expectedrecords_incremental_dedup_final.jsonl");
+    verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
+  }
+
   private static JsonNode getSchema() throws IOException {
     return Jsons.deserialize(MoreResources.readResource("schema.json"));
   }
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_dedup_final.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_dedup_final.jsonl
new file mode 100644
index 000000000000..e456f48d443a
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_dedup_final.jsonl
@@ -0,0 +1,4 @@
+// Keep the Alice record with more recent updated_at
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie"}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_dedup_raw.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_dedup_raw.jsonl
new file mode 100644
index 000000000000..88411c9e4de3
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync1_expectedrecords_dedup_raw.jsonl
@@ -0,0 +1,4 @@
+// Keep the Alice record with more recent updated_at
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_incremental_dedup_final.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_incremental_dedup_final.jsonl
new file mode 100644
index 000000000000..10cd001e22f6
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_incremental_dedup_final.jsonl
@@ -0,0 +1,3 @@
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}
+// Delete Bob, keep Charlie
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie"}
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_incremental_dedup_raw.jsonl b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_incremental_dedup_raw.jsonl
new file mode 100644
index 000000000000..bd79da0ea871
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/resources/sync2_expectedrecords_incremental_dedup_raw.jsonl
@@ -0,0 +1,5 @@
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}}
+// Keep the record that deleted Bob, but delete the other records associated with id=(1, 201)
+{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01:00:00:00Z"}}
+// And keep Charlie's record, even though it wasn't reemitted in sync2.
+{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
index c4c3be8a91af..93f21b472f89 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
@@ -386,7 +386,7 @@ String dedupFinalTable(final StreamId id,
                 WHERE row_number != 1
               )
               OR (
-                ${pk_list} IN (
+                (${pk_list}) IN (
                   SELECT (
             ${pk_cast_list}
                   )

From 80f8d9046a7a23f97a295e4a12847d629c53edb7 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Fri, 30 Jun 2023 18:49:35 +0000
Subject: [PATCH 14/46] Automated Commit - Formatting Changes

---
 .../typing_deduping/BaseTypingDedupingTest.java       | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index db43e67bab3f..5a8873440423 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -216,9 +216,9 @@ public void fullRefreshAppend() throws Exception {
   /**
    * Starting with an empty destination, execute an incremental append sync.
    * <p>
-   * This is (not so secretly) identical to {@link #fullRefreshAppend()}, and uses the same set of expected records.
-   * Incremental as a concept only exists in the source. From the destination's perspective, we only care about the
-   * destination sync mode.
+   * This is (not so secretly) identical to {@link #fullRefreshAppend()}, and uses the same set of
+   * expected records. Incremental as a concept only exists in the source. From the destination's
+   * perspective, we only care about the destination sync mode.
    */
   @Test
   public void incrementalAppend() throws Exception {
@@ -253,8 +253,9 @@ public void incrementalAppend() throws Exception {
   }
 
   /**
-   * Starting with an empty destination, execute an incremental dedup sync. Verify that the records are written to the
-   * destination table. Then run a second sync, and verify that the raw/final tables contain the correct records.
+   * Starting with an empty destination, execute an incremental dedup sync. Verify that the records
+   * are written to the destination table. Then run a second sync, and verify that the raw/final
+   * tables contain the correct records.
    */
   @Test
   public void incrementalDedup() throws Exception {

From 73b9e9014bb560ac38c9d6df878b98e091541b0e Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 13:04:23 -0700
Subject: [PATCH 15/46] readability stuff

---
 .../BaseTypingDedupingTest.java               | 37 ++++++++++++++-----
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 5a8873440423..3db76a63de18 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -365,10 +365,11 @@ private static String getFieldIfPresent(JsonNode record, String field) {
    *        PK+cursor+extracted_at)
    * @param sortComparator Behaves identically to identityComparator, but if two records are the same,
    *        breaks that tie using _airbyte_raw_id
+   * @param recordIdExtractor Dump the record's PK+cursor+extracted_at into a human-readable string
+   * @param extractRawData Whether to look inside the _airbyte_data column and diff its subfields
    * @return The diff, or empty string if there were no differences
    */
-  private static String diffRecords(
-                                    List<JsonNode> originalExpectedRecords,
+  private static String diffRecords(List<JsonNode> originalExpectedRecords,
                                     List<JsonNode> originalActualRecords,
                                     Comparator<JsonNode> identityComparator,
                                     Comparator<JsonNode> sortComparator,
@@ -379,7 +380,6 @@ private static String diffRecords(
 
     // Iterate through both lists in parallel and compare each record.
     // Build up an error message listing any incorrect, missing, or unexpected records.
-    // Not a true diff, but close enough.
     String message = "";
     int expectedRecordIndex = 0;
     int actualRecordIndex = 0;
@@ -391,11 +391,13 @@ private static String diffRecords(
         // These records should be the same. Find the specific fields that are different.
         boolean foundMismatch = false;
         String mismatchedRecordMessage = "Row had incorrect data:" + recordIdExtractor.apply(expectedRecord) + "\n";
-        // Iterate through each field in the expected record and compare it to the actual record's value.
+        // Iterate through each column in the expected record and compare it to the actual record's value.
         for (String column : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
           if (extractRawData && "_airbyte_data".equals(column)) {
+            // For the raw data in particular, we should also diff the fields inside _airbyte_data.
             JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
             JsonNode actualRawData = actualRecord.get("_airbyte_data");
+            // Iterate through all the subfields of the expected raw data and check that they match the actual record...
             for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
               JsonNode expectedValue = expectedRawData.get(field);
               JsonNode actualValue = actualRawData.get(field);
@@ -404,7 +406,8 @@ private static String diffRecords(
                 foundMismatch = true;
               }
             }
-            LinkedHashMap<String, JsonNode> extraColumns = checkForExtraFields(expectedRawData, actualRawData);
+            // ... and then check the actual raw data for any subfields that we weren't expecting.
+            LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRawData, actualRawData);
             if (extraColumns.size() > 0) {
               for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
                 mismatchedRecordMessage += generateFieldError("_airbyte_data." + extraColumn.getKey(), null, extraColumn.getValue());
@@ -412,6 +415,7 @@ private static String diffRecords(
               }
             }
           } else {
+            // For all other columns, we can just compare their values directly.
             JsonNode expectedValue = expectedRecord.get(column);
             JsonNode actualValue = actualRecord.get(column);
             if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
@@ -420,7 +424,8 @@ private static String diffRecords(
             }
           }
         }
-        LinkedHashMap<String, JsonNode> extraColumns = checkForExtraFields(expectedRecord, actualRecord);
+        // Then check the entire actual record for any columns that we weren't expecting.
+        LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRecord, actualRecord);
         if (extraColumns.size() > 0) {
           for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
             mismatchedRecordMessage += generateFieldError("column " + extraColumn.getKey(), null, extraColumn.getValue());
@@ -467,7 +472,15 @@ private static boolean jsonNodesNotEquivalent(JsonNode expectedValue, JsonNode a
         && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble());
   }
 
-  private static LinkedHashMap<String, JsonNode> checkForExtraFields(JsonNode expectedRecord, JsonNode actualRecord) {
+  /**
+   * Verify that all fields in the actual record are present in the expected record. This is primarily relevant for
+   * detecting fields that we expected to be null, but actually were not. See {@link #dumpFinalTableRecords(String, String)}
+   * for an explanation of how SQL/JSON nulls are represented in the expected record.
+   * <p>
+   * This has the side benefit of detecting completely unexpected columns, which would be a very weird bug but is
+   * probably still useful to catch.
+   */
+  private static LinkedHashMap<String, JsonNode> checkForExtraOrNonNullFields(JsonNode expectedRecord, JsonNode actualRecord) {
     LinkedHashMap<String, JsonNode> extraFields = new LinkedHashMap<>();
     for (String column : Streams.stream(actualRecord.fieldNames()).sorted().toList()) {
       // loaded_at and raw_id are generated dynamically, so we just ignore them.
@@ -478,15 +491,19 @@ private static LinkedHashMap<String, JsonNode> checkForExtraFields(JsonNode expe
     return extraFields;
   }
 
+  /**
+   * Produce a pretty-printed error message, e.g. "  For column foo, expected 1 but got 2". It's indented intentionally.
+   */
   private static String generateFieldError(String fieldname, JsonNode expectedValue, JsonNode actualValue) {
     String expectedString = expectedValue == null ? "SQL NULL (i.e. no value)" : expectedValue.toString();
     String actualString = actualValue == null ? "SQL NULL (i.e. no value)" : actualValue.toString();
     return "  For " + fieldname + ", expected " + expectedString + " but got " + actualString + "\n";
   }
 
+  // These asFoo methods are used for sorting records, so their defaults are intended to make broken records stand out.
   private static long asInt(JsonNode node) {
     if (node == null || !node.isIntegralNumber()) {
-      return Integer.MIN_VALUE;
+      return Long.MIN_VALUE;
     } else {
       return node.longValue();
     }
@@ -512,11 +529,11 @@ private static Instant asTimestamp(JsonNode node) {
 
   /*
    * !!!!!! WARNING !!!!!! The code below was mostly copypasted from DestinationAcceptanceTest. If you
-   * make edits here, you probably want to also edit there. !!!!!!!!!!!!!!!!!!!!!
+   * make edits here, you probably want to also edit there.
    */
 
+  // These contain some state, so they are instanced per test (i.e. cannot be static)
   private Path jobRoot;
-  // This contains some state, so it needs to be instanced per test (i.e. cannot be static)
   private ProcessFactory processFactory;
 
   @BeforeEach

From a40935b2e85432b82e2cedc7e885d413ca4fcbcc Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Fri, 30 Jun 2023 20:09:38 +0000
Subject: [PATCH 16/46] Automated Commit - Formatting Changes

---
 .../BaseTypingDedupingTest.java               | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 3db76a63de18..025758bb2179 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -397,7 +397,8 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
             // For the raw data in particular, we should also diff the fields inside _airbyte_data.
             JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
             JsonNode actualRawData = actualRecord.get("_airbyte_data");
-            // Iterate through all the subfields of the expected raw data and check that they match the actual record...
+            // Iterate through all the subfields of the expected raw data and check that they match the actual
+            // record...
             for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
               JsonNode expectedValue = expectedRawData.get(field);
               JsonNode actualValue = actualRawData.get(field);
@@ -473,12 +474,13 @@ private static boolean jsonNodesNotEquivalent(JsonNode expectedValue, JsonNode a
   }
 
   /**
-   * Verify that all fields in the actual record are present in the expected record. This is primarily relevant for
-   * detecting fields that we expected to be null, but actually were not. See {@link #dumpFinalTableRecords(String, String)}
-   * for an explanation of how SQL/JSON nulls are represented in the expected record.
+   * Verify that all fields in the actual record are present in the expected record. This is primarily
+   * relevant for detecting fields that we expected to be null, but actually were not. See
+   * {@link #dumpFinalTableRecords(String, String)} for an explanation of how SQL/JSON nulls are
+   * represented in the expected record.
    * <p>
-   * This has the side benefit of detecting completely unexpected columns, which would be a very weird bug but is
-   * probably still useful to catch.
+   * This has the side benefit of detecting completely unexpected columns, which would be a very weird
+   * bug but is probably still useful to catch.
    */
   private static LinkedHashMap<String, JsonNode> checkForExtraOrNonNullFields(JsonNode expectedRecord, JsonNode actualRecord) {
     LinkedHashMap<String, JsonNode> extraFields = new LinkedHashMap<>();
@@ -492,7 +494,8 @@ private static LinkedHashMap<String, JsonNode> checkForExtraOrNonNullFields(Json
   }
 
   /**
-   * Produce a pretty-printed error message, e.g. "  For column foo, expected 1 but got 2". It's indented intentionally.
+   * Produce a pretty-printed error message, e.g. " For column foo, expected 1 but got 2". It's
+   * indented intentionally.
    */
   private static String generateFieldError(String fieldname, JsonNode expectedValue, JsonNode actualValue) {
     String expectedString = expectedValue == null ? "SQL NULL (i.e. no value)" : expectedValue.toString();
@@ -500,7 +503,8 @@ private static String generateFieldError(String fieldname, JsonNode expectedValu
     return "  For " + fieldname + ", expected " + expectedString + " but got " + actualString + "\n";
   }
 
-  // These asFoo methods are used for sorting records, so their defaults are intended to make broken records stand out.
+  // These asFoo methods are used for sorting records, so their defaults are intended to make broken
+  // records stand out.
   private static long asInt(JsonNode node) {
     if (node == null || !node.isIntegralNumber()) {
       return Long.MIN_VALUE;

From 7bb4b2cf57b2eaee744d460b930a928a4035271d Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 13:18:30 -0700
Subject: [PATCH 17/46] add test for gcs mode

---
 .../AbstractBigQueryTypingDedupingTest.java   | 75 +++++++++++++++++++
 .../BigQueryGcsTypingDedupingTest.java        | 12 +++
 ...ueryStandardInsertsTypingDedupingTest.java | 66 +---------------
 3 files changed, 89 insertions(+), 64 deletions(-)
 create mode 100644 airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
 create mode 100644 airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryGcsTypingDedupingTest.java

diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
new file mode 100644
index 000000000000..2ec980e3cb60
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
@@ -0,0 +1,75 @@
+package io.airbyte.integrations.destination.bigquery.typing_deduping;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.QueryJobConfiguration;
+import com.google.cloud.bigquery.TableId;
+import com.google.cloud.bigquery.TableResult;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.commons.string.Strings;
+import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest;
+import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
+import io.airbyte.integrations.destination.bigquery.BigQueryDestinationTestUtils;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.util.LinkedHashMap;
+import java.util.List;
+
+public abstract class AbstractBigQueryTypingDedupingTest extends BaseTypingDedupingTest {
+
+  private static BigQuery bq;
+
+  /**
+   * Subclasses should call this in an @BeforeAll block rather than directly setting {@see BaseTypingDedupingTest#config}.
+   */
+  protected static void setConfig(String configPath) throws IOException {
+    final String datasetId = Strings.addRandomSuffix("typing_deduping_default_dataset", "_", 5);
+    config = BigQueryDestinationTestUtils.createConfig(Path.of(configPath), datasetId);
+    bq = BigQueryDestination.getBigQuery(config);
+  }
+
+  @Override
+  protected String getImageName() {
+    return "airbyte/destination-bigquery:dev";
+  }
+
+  @Override
+  protected List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws InterruptedException {
+    TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM airbyte." + streamNamespace + "_" + streamName));
+    List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
+    return rowsAsMaps.stream().map(AbstractBigQueryTypingDedupingTest::toJson).toList();
+  }
+
+  @Override
+  protected List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws InterruptedException {
+    TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM " + streamNamespace + "." + streamName));
+    List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
+    return rowsAsMaps.stream().map(AbstractBigQueryTypingDedupingTest::toJson).toList();
+  }
+
+  @Override
+  protected void teardownStreamAndNamespace(String streamNamespace, String streamName) {
+    // bq.delete simply returns false if the table/schema doesn't exist (e.g. if the connector failed to create it)
+    // so we don't need to do any existence checks here.
+    bq.delete(TableId.of("airbyte", streamNamespace + "_" + streamName));
+    bq.delete(DatasetId.of(streamNamespace), BigQuery.DatasetDeleteOption.deleteContents());
+  }
+
+  private static JsonNode toJson(LinkedHashMap<String, Object> map) {
+    ObjectNode o = (ObjectNode) Jsons.emptyObject();
+    map.forEach((key, value) -> {
+      if (value == null) {
+        // If the value is null, do nothing. We don't want to insert it into the json at all.
+      } else if (value instanceof Instant i) {
+        // naively converting an Instant returns a DecimalNode with the unix epoch, so instead we manually stringify it
+        o.set(key, Jsons.jsonNode(i.toString()));
+      } else {
+        o.set(key, Jsons.jsonNode(value));
+      }
+    });
+    return o;
+  }
+}
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryGcsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryGcsTypingDedupingTest.java
new file mode 100644
index 000000000000..b79d783df3de
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryGcsTypingDedupingTest.java
@@ -0,0 +1,12 @@
+package io.airbyte.integrations.destination.bigquery.typing_deduping;
+
+import java.io.IOException;
+import org.junit.jupiter.api.BeforeAll;
+
+public class BigQueryGcsTypingDedupingTest extends AbstractBigQueryTypingDedupingTest {
+
+  @BeforeAll
+  public static void buildConfig() throws IOException {
+    setConfig("secrets/credentials-1s1t-gcs.json");
+  }
+}
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
index 0da1f2945c65..16ed7cabd58e 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
@@ -1,75 +1,13 @@
 package io.airbyte.integrations.destination.bigquery.typing_deduping;
 
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-import com.google.cloud.bigquery.BigQuery;
-import com.google.cloud.bigquery.DatasetId;
-import com.google.cloud.bigquery.QueryJobConfiguration;
-import com.google.cloud.bigquery.TableId;
-import com.google.cloud.bigquery.TableResult;
-import io.airbyte.commons.json.Jsons;
-import io.airbyte.commons.string.Strings;
-import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest;
-import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
-import io.airbyte.integrations.destination.bigquery.BigQueryDestinationTestUtils;
 import java.io.IOException;
-import java.nio.file.Path;
-import java.time.Instant;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
 import org.junit.jupiter.api.BeforeAll;
 
-public class BigQueryStandardInsertsTypingDedupingTest extends BaseTypingDedupingTest {
-
-  private static BigQuery bq;
+public class BigQueryStandardInsertsTypingDedupingTest extends AbstractBigQueryTypingDedupingTest {
 
   // Note that this is not an @Override, because it's a static method. I would love suggestions on how to do this better :)
   @BeforeAll
   public static void buildConfig() throws IOException {
-    final String datasetId = Strings.addRandomSuffix("typing_deduping_default_dataset", "_", 5);
-    config = BigQueryDestinationTestUtils.createConfig(Path.of("secrets/credentials-1s1t-standard.json"), datasetId);
-    bq = BigQueryDestination.getBigQuery(config);
-  }
-
-  @Override
-  protected String getImageName() {
-    return "airbyte/destination-bigquery:dev";
-  }
-
-  @Override
-  protected List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws InterruptedException {
-    TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM airbyte." + streamNamespace + "_" + streamName));
-    List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
-    return rowsAsMaps.stream().map(BigQueryStandardInsertsTypingDedupingTest::toJson).toList();
-  }
-
-  @Override
-  protected List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws InterruptedException {
-    TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM " + streamNamespace + "." + streamName));
-    List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
-    return rowsAsMaps.stream().map(BigQueryStandardInsertsTypingDedupingTest::toJson).toList();
-  }
-
-  @Override
-  protected void teardownStreamAndNamespace(String streamNamespace, String streamName) {
-    // bq.delete simply returns false if the table/schema doesn't exist (e.g. if the connector failed to create it)
-    bq.delete(TableId.of("airbyte", streamNamespace + "_" + streamName));
-    bq.delete(DatasetId.of(streamNamespace), BigQuery.DatasetDeleteOption.deleteContents());
-  }
-
-  private static JsonNode toJson(LinkedHashMap<String, Object> map) {
-    ObjectNode o = (ObjectNode) Jsons.emptyObject();
-    map.forEach((key, value) -> {
-      if (value == null) {
-        // If the value is null, do nothing. We don't want to insert it into the json at all.
-      } else if (value instanceof Instant i) {
-        // naively converting an Instant returns a DecimalNode with the unix epoch, so instead we manually stringify it
-        o.set(key, Jsons.jsonNode(i.toString()));
-      } else {
-        o.set(key, Jsons.jsonNode(value));
-      }
-    });
-    return o;
+    setConfig("secrets/credentials-1s1t-standard.json");
   }
 }

From 4da21d1866e85e22e42887c86cc54654a8dc192b Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 14:21:12 -0700
Subject: [PATCH 18/46] remove static fields

---
 .../BaseTypingDedupingTest.java               | 40 +++++++++++++------
 .../destination-bigquery/build.gradle         |  1 -
 .../AbstractBigQueryTypingDedupingTest.java   | 16 ++++----
 .../BigQueryGcsTypingDedupingTest.java        |  9 ++---
 ...ueryStandardInsertsTypingDedupingTest.java | 10 ++---
 5 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 025758bb2179..8a3994ce1701 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -13,7 +13,6 @@
 import io.airbyte.commons.json.Jsons;
 import io.airbyte.commons.lang.Exceptions;
 import io.airbyte.commons.resources.MoreResources;
-import io.airbyte.commons.string.Strings;
 import io.airbyte.configoss.WorkerDestinationConfig;
 import io.airbyte.protocol.models.v0.AirbyteMessage;
 import io.airbyte.protocol.models.v0.AirbyteStream;
@@ -38,6 +37,7 @@
 import java.util.Objects;
 import java.util.UUID;
 import java.util.function.Function;
+import org.apache.commons.lang3.RandomStringUtils;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -60,6 +60,7 @@
 // Remember to set `'junit.jupiter.execution.parallel.enabled': 'true'` in your connector's
 // build.gradle.
 // See destination-bigquery for an example.
+// If you're running from inside intellij, you must run your specific subclass to get concurrent execution.
 @Execution(ExecutionMode.CONCURRENT)
 public abstract class BaseTypingDedupingTest {
 
@@ -79,15 +80,8 @@ public abstract class BaseTypingDedupingTest {
   private static final Comparator<JsonNode> FINAL_RECORD_SORT_COMPARATOR = FINAL_RECORD_IDENTITY_COMPARATOR
       .thenComparing(record -> asString(record.get("_airbyte_raw_id")));
 
-  /**
-   * Subclasses MUST implement a static {@link org.junit.jupiter.api.BeforeAll} method that sets this
-   * field.
-   * <p>
-   * That method should also start testcontainer(s), if you're using them. That test container will be
-   * used for all tests. This is safe because each test uses a randomized stream namespace+name.
-   */
-  protected static JsonNode config;
-
+  private String randomSuffix;
+  private JsonNode config;
   private String streamNamespace;
   private String streamName;
 
@@ -96,6 +90,15 @@ public abstract class BaseTypingDedupingTest {
    */
   protected abstract String getImageName();
 
+  /**
+   * Get the destination connector config. Subclasses may use this method for other setup work, e.g. opening a connection
+   * to the destination.
+   * <p>
+   * Subclasses should _not_ start testcontainers in this method; that belongs in a BeforeAll method. The tests in this
+   * class are intended to be run concurrently on a shared database and will not interfere with each other.
+   */
+  protected abstract JsonNode getConfig() throws Exception;
+
   /**
    * For a given stream, return the records that exist in the destination's raw table. Each record
    * must be in the format {"_airbyte_raw_id": "...", "_airbyte_extracted_at": "...",
@@ -131,10 +134,21 @@ public abstract class BaseTypingDedupingTest {
    */
   protected abstract void teardownStreamAndNamespace(String streamNamespace, String streamName) throws Exception;
 
+  /**
+   * @return A suffix which is different for each concurrent test run.
+   */
+  protected synchronized String getUniqueSuffix() {
+    if (randomSuffix == null) {
+      randomSuffix = "_" + RandomStringUtils.randomAlphabetic(5).toLowerCase();
+    }
+    return randomSuffix;
+  }
+
   @BeforeEach
-  public void setup() {
-    streamNamespace = Strings.addRandomSuffix("typing_deduping_test", "_", 5);
-    streamName = Strings.addRandomSuffix("test_stream", "_", 5);
+  public void setup() throws Exception {
+    config = getConfig();
+    streamNamespace = "typing_deduping_test" + getUniqueSuffix();
+    streamName = "test_stream" + getUniqueSuffix();
     LOGGER.info("Using stream namespace {} and name {}", streamNamespace, streamName);
   }
 
diff --git a/airbyte-integrations/connectors/destination-bigquery/build.gradle b/airbyte-integrations/connectors/destination-bigquery/build.gradle
index 3867ce22c210..2229ad250b72 100644
--- a/airbyte-integrations/connectors/destination-bigquery/build.gradle
+++ b/airbyte-integrations/connectors/destination-bigquery/build.gradle
@@ -60,4 +60,3 @@ integrationTestJava {
         // 'junit.jupiter.execution.parallel.mode.default': 'concurrent'
     ]
 }
-
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
index 2ec980e3cb60..ea0f99c0632c 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
@@ -8,7 +8,6 @@
 import com.google.cloud.bigquery.TableId;
 import com.google.cloud.bigquery.TableResult;
 import io.airbyte.commons.json.Jsons;
-import io.airbyte.commons.string.Strings;
 import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestinationTestUtils;
@@ -20,15 +19,16 @@
 
 public abstract class AbstractBigQueryTypingDedupingTest extends BaseTypingDedupingTest {
 
-  private static BigQuery bq;
+  private BigQuery bq;
 
-  /**
-   * Subclasses should call this in an @BeforeAll block rather than directly setting {@see BaseTypingDedupingTest#config}.
-   */
-  protected static void setConfig(String configPath) throws IOException {
-    final String datasetId = Strings.addRandomSuffix("typing_deduping_default_dataset", "_", 5);
-    config = BigQueryDestinationTestUtils.createConfig(Path.of(configPath), datasetId);
+  protected abstract String getConfigPath();
+
+  @Override
+  public JsonNode getConfig() throws IOException {
+    final String datasetId = "typing_deduping_default_dataset" + getUniqueSuffix();
+    JsonNode config = BigQueryDestinationTestUtils.createConfig(Path.of(getConfigPath()), datasetId);
     bq = BigQueryDestination.getBigQuery(config);
+    return config;
   }
 
   @Override
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryGcsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryGcsTypingDedupingTest.java
index b79d783df3de..df201d6c687b 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryGcsTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryGcsTypingDedupingTest.java
@@ -1,12 +1,9 @@
 package io.airbyte.integrations.destination.bigquery.typing_deduping;
 
-import java.io.IOException;
-import org.junit.jupiter.api.BeforeAll;
-
 public class BigQueryGcsTypingDedupingTest extends AbstractBigQueryTypingDedupingTest {
 
-  @BeforeAll
-  public static void buildConfig() throws IOException {
-    setConfig("secrets/credentials-1s1t-gcs.json");
+  @Override
+  public String getConfigPath() {
+    return "secrets/credentials-1s1t-gcs.json";
   }
 }
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
index 16ed7cabd58e..be86379f2719 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQueryStandardInsertsTypingDedupingTest.java
@@ -1,13 +1,9 @@
 package io.airbyte.integrations.destination.bigquery.typing_deduping;
 
-import java.io.IOException;
-import org.junit.jupiter.api.BeforeAll;
-
 public class BigQueryStandardInsertsTypingDedupingTest extends AbstractBigQueryTypingDedupingTest {
 
-  // Note that this is not an @Override, because it's a static method. I would love suggestions on how to do this better :)
-  @BeforeAll
-  public static void buildConfig() throws IOException {
-    setConfig("secrets/credentials-1s1t-standard.json");
+  @Override
+  public String getConfigPath() {
+    return "secrets/credentials-1s1t-standard.json";
   }
 }

From a8fa7d40bb2988391d218792f4bdfd1ed43f87a6 Mon Sep 17 00:00:00 2001
From: octavia-approvington <octavia-approvington@users.noreply.github.com>
Date: Fri, 30 Jun 2023 21:26:21 +0000
Subject: [PATCH 19/46] Automated Commit - Formatting Changes

---
 .../typing_deduping/BaseTypingDedupingTest.java      | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 8a3994ce1701..9b4dd9b6409f 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -60,7 +60,8 @@
 // Remember to set `'junit.jupiter.execution.parallel.enabled': 'true'` in your connector's
 // build.gradle.
 // See destination-bigquery for an example.
-// If you're running from inside intellij, you must run your specific subclass to get concurrent execution.
+// If you're running from inside intellij, you must run your specific subclass to get concurrent
+// execution.
 @Execution(ExecutionMode.CONCURRENT)
 public abstract class BaseTypingDedupingTest {
 
@@ -91,11 +92,12 @@ public abstract class BaseTypingDedupingTest {
   protected abstract String getImageName();
 
   /**
-   * Get the destination connector config. Subclasses may use this method for other setup work, e.g. opening a connection
-   * to the destination.
+   * Get the destination connector config. Subclasses may use this method for other setup work, e.g.
+   * opening a connection to the destination.
    * <p>
-   * Subclasses should _not_ start testcontainers in this method; that belongs in a BeforeAll method. The tests in this
-   * class are intended to be run concurrently on a shared database and will not interfere with each other.
+   * Subclasses should _not_ start testcontainers in this method; that belongs in a BeforeAll method.
+   * The tests in this class are intended to be run concurrently on a shared database and will not
+   * interfere with each other.
    */
   protected abstract JsonNode getConfig() throws Exception;
 

From 067ee0db5496eb6c70a9fb92ad0dbadea77ef72b Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 15:23:48 -0700
Subject: [PATCH 20/46] add more test cases, tweak test scaffold

---
 .../BaseTypingDedupingTest.java               | 106 +++++++++++++++++-
 .../AbstractBigQueryTypingDedupingTest.java   |   9 +-
 2 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 9b4dd9b6409f..72f3a88a0fec 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -8,6 +8,7 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Streams;
 import io.airbyte.commons.features.EnvVariableFeatureFlags;
 import io.airbyte.commons.json.Jsons;
@@ -40,6 +41,7 @@
 import org.apache.commons.lang3.RandomStringUtils;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.parallel.Execution;
 import org.junit.jupiter.api.parallel.ExecutionMode;
@@ -99,7 +101,7 @@ public abstract class BaseTypingDedupingTest {
    * The tests in this class are intended to be run concurrently on a shared database and will not
    * interfere with each other.
    */
-  protected abstract JsonNode getConfig() throws Exception;
+  protected abstract JsonNode generateConfig() throws Exception;
 
   /**
    * For a given stream, return the records that exist in the destination's raw table. Each record
@@ -108,6 +110,8 @@ public abstract class BaseTypingDedupingTest {
    * <p>
    * The {@code _airbyte_data} column must be an
    * {@link com.fasterxml.jackson.databind.node.ObjectNode} (i.e. it cannot be a string value).
+   * <p>
+   * streamNamespace may be null, in which case you should query from the default namespace.
    */
   protected abstract List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws Exception;
 
@@ -123,6 +127,8 @@ public abstract class BaseTypingDedupingTest {
    * <p>
    * The corresponding SQL looks like
    * {@code INSERT INTO ... (name, address) VALUES ('null' :: jsonb, NULL)}.
+   * <p>
+   * streamNamespace may be null, in which case you should query from the default namespace.
    */
   protected abstract List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws Exception;
 
@@ -146,9 +152,13 @@ protected synchronized String getUniqueSuffix() {
     return randomSuffix;
   }
 
+  protected JsonNode getConfig() {
+    return config;
+  }
+
   @BeforeEach
   public void setup() throws Exception {
-    config = getConfig();
+    config = generateConfig();
     streamNamespace = "typing_deduping_test" + getUniqueSuffix();
     streamName = "test_stream" + getUniqueSuffix();
     LOGGER.info("Using stream namespace {} and name {}", streamNamespace, streamName);
@@ -305,6 +315,98 @@ public void incrementalDedup() throws Exception {
     verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
   }
 
+  @Test
+  @Disabled("Not yet implemented")
+  public void testLineBreakCharacters() throws Exception {
+    // TODO verify that we can handle strings with interesting characters
+    // build an airbyterecordmessage using something like this, and add it to the input messages:
+    Jsons.jsonNode(ImmutableMap.builder()
+        .put("id", 1)
+        .put("currency", "USD\u2028")
+        .put("date", "2020-03-\n31T00:00:00Z\r")
+        // TODO(sherifnada) hack: write decimals with sigfigs because Snowflake stores 10.1 as "10" which
+        // fails destination tests
+        .put("HKD", 10.1)
+        .put("NZD", 700.1)
+        .build());
+  }
+
+  @Test
+  @Disabled("Not yet implemented")
+  public void testIncrementalSyncDropOneColumn() throws Exception {
+    // TODO in incremental dedup mode: run a sync, remove a column from the schema, run another sync
+    // verify that the column is dropped from the destination table
+  }
+
+  @Test
+  @Disabled("Not yet implemented")
+  public void testSyncUsesAirbyteStreamNamespaceIfNotNull() throws Exception {
+    // TODO duplicate this test for each sync mode. Run 1st+2nd syncs using a stream with null namespace:
+    ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
+        new ConfiguredAirbyteStream()
+            .withSyncMode(SyncMode.FULL_REFRESH)
+            .withCursorField(List.of("updated_at"))
+            .withDestinationSyncMode(DestinationSyncMode.OVERWRITE)
+            .withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
+            .withStream(new AirbyteStream()
+                .withNamespace(null)
+                .withName(streamName)
+                .withJsonSchema(getSchema()))));
+  }
+
+  @Test
+  @Disabled("Not yet implemented")
+  public void testSyncWriteSameTableNameDifferentNamespace() throws Exception {
+    // TODO duplicate this test for each sync mode. Run 1st+2nd syncs using two streams with the same name but different namespace:
+    ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
+        new ConfiguredAirbyteStream()
+            .withSyncMode(SyncMode.FULL_REFRESH)
+            .withCursorField(List.of("updated_at"))
+            .withDestinationSyncMode(DestinationSyncMode.OVERWRITE)
+            .withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
+            .withStream(new AirbyteStream()
+                .withNamespace(streamNamespace + "_1")
+                .withName(streamName)
+                .withJsonSchema(getSchema())),
+        new ConfiguredAirbyteStream()
+            .withSyncMode(SyncMode.FULL_REFRESH)
+            .withCursorField(List.of("updated_at"))
+            .withDestinationSyncMode(DestinationSyncMode.OVERWRITE)
+            .withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
+            .withStream(new AirbyteStream()
+                .withNamespace(streamNamespace + "_2")
+                .withName(streamName)
+                .withJsonSchema(getSchema()))));
+  }
+
+  @Test
+  @Disabled("Not yet implemented")
+  public void testSyncNotFailsWithNewFields() throws Exception {
+    // TODO duplicate this test for each sync mode. Run a sync, then add a new field to the schema, then run another sync
+    // We might want to write a test that verifies more general schema evolution (e.g. all valid evolutions)
+  }
+
+  @Test
+  @Disabled("Not yet implemented")
+  public void testSyncWithLargeRecordBatch() throws Exception {
+    // TODO duplicate this test for each sync mode. Run a single sync with many records
+    /*
+    copied from DATs:
+    This serves to test MSSQL 2100 limit parameters in a single query. this means that for Airbyte
+    insert data need to limit to ~ 700 records (3 columns for the raw tables) = 2100 params
+
+    this maybe needs configuration per destination to specify that limit?
+     */
+  }
+
+  @Test
+  @Disabled("Not yet implemented")
+  public void testDataTypes() throws Exception {
+    // TODO duplicate this test for each sync mode. See DataTypeTestArgumentProvider for what this test does in DAT-land
+    // we probably don't want to do the exact same thing, but the general spirit of testing a wide range of values for every data type is approximately correct
+    // this test probably needs some configuration per destination to specify what values are supported?
+  }
+
   private static JsonNode getSchema() throws IOException {
     return Jsons.deserialize(MoreResources.readResource("schema.json"));
   }
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
index ea0f99c0632c..912db29823be 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
@@ -11,6 +11,7 @@
 import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestinationTestUtils;
+import io.airbyte.integrations.destination.bigquery.BigQueryUtils;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.time.Instant;
@@ -24,7 +25,7 @@ public abstract class AbstractBigQueryTypingDedupingTest extends BaseTypingDedup
   protected abstract String getConfigPath();
 
   @Override
-  public JsonNode getConfig() throws IOException {
+  public JsonNode generateConfig() throws IOException {
     final String datasetId = "typing_deduping_default_dataset" + getUniqueSuffix();
     JsonNode config = BigQueryDestinationTestUtils.createConfig(Path.of(getConfigPath()), datasetId);
     bq = BigQueryDestination.getBigQuery(config);
@@ -38,6 +39,9 @@ protected String getImageName() {
 
   @Override
   protected List<JsonNode> dumpRawTableRecords(String streamNamespace, String streamName) throws InterruptedException {
+    if (streamNamespace == null) {
+      streamNamespace = BigQueryUtils.getDatasetId(getConfig());
+    }
     TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM airbyte." + streamNamespace + "_" + streamName));
     List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
     return rowsAsMaps.stream().map(AbstractBigQueryTypingDedupingTest::toJson).toList();
@@ -45,6 +49,9 @@ protected List<JsonNode> dumpRawTableRecords(String streamNamespace, String stre
 
   @Override
   protected List<JsonNode> dumpFinalTableRecords(String streamNamespace, String streamName) throws InterruptedException {
+    if (streamNamespace == null) {
+      streamNamespace = BigQueryUtils.getDatasetId(getConfig());
+    }
     TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM " + streamNamespace + "." + streamName));
     List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
     return rowsAsMaps.stream().map(AbstractBigQueryTypingDedupingTest::toJson).toList();

From c0089158d6f05ced366158ce4a0974f190dd3535 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Fri, 30 Jun 2023 15:26:56 -0700
Subject: [PATCH 21/46] cleanup

---
 .../typing_deduping/BigQuerySqlGenerator.java |  4 ++--
 .../BigQuerySqlGeneratorIntegrationTest.java  | 22 +++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
index 93f21b472f89..3de7c9395ac9 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGenerator.java
@@ -69,7 +69,7 @@ public ColumnId buildColumnId(final String name) {
     return new ColumnId(nameTransformer.getIdentifier(quotedName), name, canonicalized);
   }
 
-  public static StandardSQLTypeName toDialectType(final AirbyteType type) {
+  public StandardSQLTypeName toDialectType(final AirbyteType type) {
     // switch pattern-matching is still in preview at language level 17 :(
     if (type instanceof final AirbyteProtocolType p) {
       return toDialectType(p);
@@ -137,7 +137,7 @@ ELSE JSON_QUERY(`_airbyte_data`, '$.${column_name}')
     }
   }
 
-  public static StandardSQLTypeName toDialectType(final AirbyteProtocolType airbyteProtocolType) {
+  public StandardSQLTypeName toDialectType(final AirbyteProtocolType airbyteProtocolType) {
     return switch (airbyteProtocolType) {
       // TODO doublecheck these
       case STRING -> StandardSQLTypeName.STRING;
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
index ac4f3bf71a32..4ec3b2876be9 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
@@ -894,6 +894,17 @@ private static void logAndExecute(final String sql) throws InterruptedException
     bq.query(QueryJobConfiguration.newBuilder(sql).build());
   }
 
+  /**
+   * TableResult contains records in a somewhat nonintuitive format (and it avoids loading them all into memory).
+   * That's annoying for us since we're working with small test data, so pull everything into a list, and convert them
+   * into maps of column name -> value.
+   * <p>
+   * Note that the values have reasonable types; see {@link #toMap(Schema, FieldValueList)} for details.
+   */
+  public static List<LinkedHashMap<String, Object>> toMaps(TableResult result) {
+    return result.streamAll().map(row -> toMap(result.getSchema(), row)).toList();
+  }
+
   /**
    * FieldValueList stores everything internally as string (I think?) but provides conversions to more useful types.
    * This method does that conversion, using the schema to determine which type is most appropriate.
@@ -981,17 +992,6 @@ private void assertQueryResult(final List<Map<String, Optional<Object>>> expecte
     }
   }
 
-  /**
-   * TableResult contains records in a somewhat nonintuitive format (and it avoids loading them all into memory).
-   * That's annoying for us since we're working with small test data, so pull everything into a list, and convert them
-   * into maps of column name -> value.
-   * <p>
-   * Note that the values have reasonable types; see {@link #toMap(Schema, FieldValueList)} for details.
-   */
-  public static List<LinkedHashMap<String, Object>> toMaps(TableResult result) {
-    return result.streamAll().map(row -> toMap(result.getSchema(), row)).toList();
-  }
-
   private static String sortedToString(Map<String, Object> record) {
     return sortedToString(record, Function.identity());
   }

From 1b376a248b960673242201f5952426b3ccbaa408 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Fri, 30 Jun 2023 22:30:34 +0000
Subject: [PATCH 22/46] Automated Commit - Formatting Changes

---
 .../BaseTypingDedupingTest.java               | 28 +++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 72f3a88a0fec..af94737bc28f 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -341,7 +341,8 @@ public void testIncrementalSyncDropOneColumn() throws Exception {
   @Test
   @Disabled("Not yet implemented")
   public void testSyncUsesAirbyteStreamNamespaceIfNotNull() throws Exception {
-    // TODO duplicate this test for each sync mode. Run 1st+2nd syncs using a stream with null namespace:
+    // TODO duplicate this test for each sync mode. Run 1st+2nd syncs using a stream with null
+    // namespace:
     ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
         new ConfiguredAirbyteStream()
             .withSyncMode(SyncMode.FULL_REFRESH)
@@ -357,7 +358,8 @@ public void testSyncUsesAirbyteStreamNamespaceIfNotNull() throws Exception {
   @Test
   @Disabled("Not yet implemented")
   public void testSyncWriteSameTableNameDifferentNamespace() throws Exception {
-    // TODO duplicate this test for each sync mode. Run 1st+2nd syncs using two streams with the same name but different namespace:
+    // TODO duplicate this test for each sync mode. Run 1st+2nd syncs using two streams with the same
+    // name but different namespace:
     ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
         new ConfiguredAirbyteStream()
             .withSyncMode(SyncMode.FULL_REFRESH)
@@ -382,8 +384,10 @@ public void testSyncWriteSameTableNameDifferentNamespace() throws Exception {
   @Test
   @Disabled("Not yet implemented")
   public void testSyncNotFailsWithNewFields() throws Exception {
-    // TODO duplicate this test for each sync mode. Run a sync, then add a new field to the schema, then run another sync
-    // We might want to write a test that verifies more general schema evolution (e.g. all valid evolutions)
+    // TODO duplicate this test for each sync mode. Run a sync, then add a new field to the schema, then
+    // run another sync
+    // We might want to write a test that verifies more general schema evolution (e.g. all valid
+    // evolutions)
   }
 
   @Test
@@ -391,19 +395,21 @@ public void testSyncNotFailsWithNewFields() throws Exception {
   public void testSyncWithLargeRecordBatch() throws Exception {
     // TODO duplicate this test for each sync mode. Run a single sync with many records
     /*
-    copied from DATs:
-    This serves to test MSSQL 2100 limit parameters in a single query. this means that for Airbyte
-    insert data need to limit to ~ 700 records (3 columns for the raw tables) = 2100 params
-
-    this maybe needs configuration per destination to specify that limit?
+     * copied from DATs: This serves to test MSSQL 2100 limit parameters in a single query. this means
+     * that for Airbyte insert data need to limit to ~ 700 records (3 columns for the raw tables) = 2100
+     * params
+     *
+     * this maybe needs configuration per destination to specify that limit?
      */
   }
 
   @Test
   @Disabled("Not yet implemented")
   public void testDataTypes() throws Exception {
-    // TODO duplicate this test for each sync mode. See DataTypeTestArgumentProvider for what this test does in DAT-land
-    // we probably don't want to do the exact same thing, but the general spirit of testing a wide range of values for every data type is approximately correct
+    // TODO duplicate this test for each sync mode. See DataTypeTestArgumentProvider for what this test
+    // does in DAT-land
+    // we probably don't want to do the exact same thing, but the general spirit of testing a wide range
+    // of values for every data type is approximately correct
     // this test probably needs some configuration per destination to specify what values are supported?
   }
 

From d86dd300a2961000ac9c8221d2da1efa3d1fabbe Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 10:17:03 -0700
Subject: [PATCH 23/46] extract recorddiffer

---
 .../BaseTypingDedupingTest.java               | 269 ++-----------
 .../typing_deduping/RecordDiffer.java         | 365 ++++++++++++++++++
 .../typing_deduping/AirbyteType.java          |  19 +
 3 files changed, 410 insertions(+), 243 deletions(-)
 create mode 100644 airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index af94737bc28f..a8e82447f661 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -15,6 +15,7 @@
 import io.airbyte.commons.lang.Exceptions;
 import io.airbyte.commons.resources.MoreResources;
 import io.airbyte.configoss.WorkerDestinationConfig;
+import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.AirbyteProtocolType;
 import io.airbyte.protocol.models.v0.AirbyteMessage;
 import io.airbyte.protocol.models.v0.AirbyteStream;
 import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
@@ -39,6 +40,7 @@
 import java.util.UUID;
 import java.util.function.Function;
 import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.commons.lang3.tuple.Pair;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Disabled;
@@ -68,20 +70,19 @@
 public abstract class BaseTypingDedupingTest {
 
   private static final Logger LOGGER = LoggerFactory.getLogger(BaseTypingDedupingTest.class);
-  private static final Comparator<JsonNode> RAW_RECORD_IDENTITY_COMPARATOR = Comparator
-      .comparingLong((JsonNode record) -> asInt(record.get("_airbyte_data").get("id1")))
-      .thenComparingLong(record -> asInt(record.get("_airbyte_data").get("id2")))
-      .thenComparing(record -> asTimestamp(record.get("_airbyte_data").get("updated_at")))
-      .thenComparing(record -> asTimestamp(record.get("_airbyte_extracted_at")));
-  private static final Comparator<JsonNode> RAW_RECORD_SORT_COMPARATOR = RAW_RECORD_IDENTITY_COMPARATOR
-      .thenComparing(record -> asString(record.get("_airbyte_raw_id")));
-  private static final Comparator<JsonNode> FINAL_RECORD_IDENTITY_COMPARATOR = Comparator
-      .comparingLong((JsonNode record) -> asInt(record.get("id1")))
-      .thenComparingLong(record -> asInt(record.get("id2")))
-      .thenComparing(record -> asTimestamp(record.get("updated_at")))
-      .thenComparing(record -> asTimestamp(record.get("_airbyte_extracted_at")));
-  private static final Comparator<JsonNode> FINAL_RECORD_SORT_COMPARATOR = FINAL_RECORD_IDENTITY_COMPARATOR
-      .thenComparing(record -> asString(record.get("_airbyte_raw_id")));
+  private static final JsonNode SCHEMA;
+  static {
+    try {
+      SCHEMA = Jsons.deserialize(MoreResources.readResource("schema.json"));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  private static final RecordDiffer DIFFER = new RecordDiffer(
+      Pair.of("id1", AirbyteProtocolType.INTEGER),
+      Pair.of("id2", AirbyteProtocolType.INTEGER),
+      Pair.of("updated_at", AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE)
+  );
 
   private String randomSuffix;
   private JsonNode config;
@@ -183,7 +184,7 @@ public void fullRefreshOverwrite() throws Exception {
             .withStream(new AirbyteStream()
                 .withNamespace(streamNamespace)
                 .withName(streamName)
-                .withJsonSchema(getSchema()))));
+                .withJsonSchema(SCHEMA))));
 
     // First sync
     List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl");
@@ -218,7 +219,7 @@ public void fullRefreshAppend() throws Exception {
             .withStream(new AirbyteStream()
                 .withNamespace(streamNamespace)
                 .withName(streamName)
-                .withJsonSchema(getSchema()))));
+                .withJsonSchema(SCHEMA))));
 
     // First sync
     List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl");
@@ -257,7 +258,7 @@ public void incrementalAppend() throws Exception {
             .withStream(new AirbyteStream()
                 .withNamespace(streamNamespace)
                 .withName(streamName)
-                .withJsonSchema(getSchema()))));
+                .withJsonSchema(SCHEMA))));
 
     // First sync
     List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl");
@@ -294,7 +295,7 @@ public void incrementalDedup() throws Exception {
             .withStream(new AirbyteStream()
                 .withNamespace(streamNamespace)
                 .withName(streamName)
-                .withJsonSchema(getSchema()))));
+                .withJsonSchema(SCHEMA))));
 
     // First sync
     List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl");
@@ -352,7 +353,7 @@ public void testSyncUsesAirbyteStreamNamespaceIfNotNull() throws Exception {
             .withStream(new AirbyteStream()
                 .withNamespace(null)
                 .withName(streamName)
-                .withJsonSchema(getSchema()))));
+                .withJsonSchema(SCHEMA))));
   }
 
   @Test
@@ -369,7 +370,7 @@ public void testSyncWriteSameTableNameDifferentNamespace() throws Exception {
             .withStream(new AirbyteStream()
                 .withNamespace(streamNamespace + "_1")
                 .withName(streamName)
-                .withJsonSchema(getSchema())),
+                .withJsonSchema(SCHEMA)),
         new ConfiguredAirbyteStream()
             .withSyncMode(SyncMode.FULL_REFRESH)
             .withCursorField(List.of("updated_at"))
@@ -378,7 +379,7 @@ public void testSyncWriteSameTableNameDifferentNamespace() throws Exception {
             .withStream(new AirbyteStream()
                 .withNamespace(streamNamespace + "_2")
                 .withName(streamName)
-                .withJsonSchema(getSchema()))));
+                .withJsonSchema(SCHEMA))));
   }
 
   @Test
@@ -413,8 +414,10 @@ public void testDataTypes() throws Exception {
     // this test probably needs some configuration per destination to specify what values are supported?
   }
 
-  private static JsonNode getSchema() throws IOException {
-    return Jsons.deserialize(MoreResources.readResource("schema.json"));
+  private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode> expectedFinalRecords) throws Exception {
+    List<JsonNode> actualRawRecords = dumpRawTableRecords(streamNamespace, streamName);
+    List<JsonNode> actualFinalRecords = dumpFinalTableRecords(streamNamespace, streamName);
+    DIFFER.verifySyncResult(expectedRawRecords, actualRawRecords, expectedFinalRecords, actualFinalRecords);
   }
 
   private List<JsonNode> readRecords(String filename) throws IOException {
@@ -435,226 +438,6 @@ private List<AirbyteMessage> readMessages(String filename) throws IOException {
         }).toList();
   }
 
-  private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode> expectedFinalRecords) throws Exception {
-    List<JsonNode> actualRawRecords = dumpRawTableRecords(streamNamespace, streamName);
-    String rawDiff = diffRawTableRecords(expectedRawRecords, actualRawRecords);
-    List<JsonNode> actualFinalRecords = dumpFinalTableRecords(streamNamespace, streamName);
-    String finalDiff = diffFinalTableRecords(expectedFinalRecords, actualFinalRecords);
-
-    assertAll(
-        () -> assertTrue(rawDiff.isEmpty(), "Raw table was incorrect.\n" + rawDiff),
-        () -> assertTrue(finalDiff.isEmpty(), "Final table was incorrect.\n" + finalDiff));
-  }
-
-  private static String diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
-    return diffRecords(
-        expectedRecords,
-        actualRecords,
-        RAW_RECORD_IDENTITY_COMPARATOR,
-        RAW_RECORD_SORT_COMPARATOR,
-        record -> getFieldIfPresent(record.get("_airbyte_data"), "id1")
-            + getFieldIfPresent(record.get("_airbyte_data"), "id2")
-            + getFieldIfPresent(record.get("_airbyte_data"), "updated_at")
-            + getFieldIfPresent(record, "_airbyte_extracted_at"),
-        true);
-  }
-
-  private static String diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
-    return diffRecords(
-        expectedRecords,
-        actualRecords,
-        FINAL_RECORD_IDENTITY_COMPARATOR,
-        FINAL_RECORD_SORT_COMPARATOR,
-        record -> getFieldIfPresent(record, "id1")
-            + getFieldIfPresent(record, "id2")
-            + getFieldIfPresent(record, "updated_at")
-            + getFieldIfPresent(record, "_airbyte_extracted_at"),
-        false);
-  }
-
-  private static String getFieldIfPresent(JsonNode record, String field) {
-    if (record.has(field)) {
-      return field + "=" + record.get(field) + "; ";
-    } else {
-      return "";
-    }
-  }
-
-  /**
-   * Generate a human-readable diff between the two lists. Only checks the keys specified in
-   * expectedRecords. Assumes (in general) that two records with the same PK, cursor, and extracted_at
-   * are the same record.
-   *
-   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
-   *        PK+cursor+extracted_at)
-   * @param sortComparator Behaves identically to identityComparator, but if two records are the same,
-   *        breaks that tie using _airbyte_raw_id
-   * @param recordIdExtractor Dump the record's PK+cursor+extracted_at into a human-readable string
-   * @param extractRawData Whether to look inside the _airbyte_data column and diff its subfields
-   * @return The diff, or empty string if there were no differences
-   */
-  private static String diffRecords(List<JsonNode> originalExpectedRecords,
-                                    List<JsonNode> originalActualRecords,
-                                    Comparator<JsonNode> identityComparator,
-                                    Comparator<JsonNode> sortComparator,
-                                    Function<JsonNode, String> recordIdExtractor,
-                                    boolean extractRawData) {
-    List<JsonNode> expectedRecords = originalExpectedRecords.stream().sorted(sortComparator).toList();
-    List<JsonNode> actualRecords = originalActualRecords.stream().sorted(sortComparator).toList();
-
-    // Iterate through both lists in parallel and compare each record.
-    // Build up an error message listing any incorrect, missing, or unexpected records.
-    String message = "";
-    int expectedRecordIndex = 0;
-    int actualRecordIndex = 0;
-    while (expectedRecordIndex < expectedRecords.size() && actualRecordIndex < actualRecords.size()) {
-      JsonNode expectedRecord = expectedRecords.get(expectedRecordIndex);
-      JsonNode actualRecord = actualRecords.get(actualRecordIndex);
-      int compare = identityComparator.compare(expectedRecord, actualRecord);
-      if (compare == 0) {
-        // These records should be the same. Find the specific fields that are different.
-        boolean foundMismatch = false;
-        String mismatchedRecordMessage = "Row had incorrect data:" + recordIdExtractor.apply(expectedRecord) + "\n";
-        // Iterate through each column in the expected record and compare it to the actual record's value.
-        for (String column : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
-          if (extractRawData && "_airbyte_data".equals(column)) {
-            // For the raw data in particular, we should also diff the fields inside _airbyte_data.
-            JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
-            JsonNode actualRawData = actualRecord.get("_airbyte_data");
-            // Iterate through all the subfields of the expected raw data and check that they match the actual
-            // record...
-            for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
-              JsonNode expectedValue = expectedRawData.get(field);
-              JsonNode actualValue = actualRawData.get(field);
-              if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
-                mismatchedRecordMessage += generateFieldError("_airbyte_data." + field, expectedValue, actualValue);
-                foundMismatch = true;
-              }
-            }
-            // ... and then check the actual raw data for any subfields that we weren't expecting.
-            LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRawData, actualRawData);
-            if (extraColumns.size() > 0) {
-              for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
-                mismatchedRecordMessage += generateFieldError("_airbyte_data." + extraColumn.getKey(), null, extraColumn.getValue());
-                foundMismatch = true;
-              }
-            }
-          } else {
-            // For all other columns, we can just compare their values directly.
-            JsonNode expectedValue = expectedRecord.get(column);
-            JsonNode actualValue = actualRecord.get(column);
-            if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
-              mismatchedRecordMessage += generateFieldError("column " + column, expectedValue, actualValue);
-              foundMismatch = true;
-            }
-          }
-        }
-        // Then check the entire actual record for any columns that we weren't expecting.
-        LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRecord, actualRecord);
-        if (extraColumns.size() > 0) {
-          for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
-            mismatchedRecordMessage += generateFieldError("column " + extraColumn.getKey(), null, extraColumn.getValue());
-            foundMismatch = true;
-          }
-        }
-        if (foundMismatch) {
-          message += mismatchedRecordMessage;
-        }
-
-        expectedRecordIndex++;
-        actualRecordIndex++;
-      } else if (compare < 0) {
-        // The expected record is missing from the actual records. Print it and move on to the next expected
-        // record.
-        message += "Row was expected but missing: " + expectedRecord + "\n";
-        expectedRecordIndex++;
-      } else {
-        // There's an actual record which isn't present in the expected records. Print it and move on to the
-        // next actual record.
-        message += "Row was not expected but present: " + actualRecord + "\n";
-        actualRecordIndex++;
-      }
-    }
-    // Tail loops in case we reached the end of one list before the other.
-    while (expectedRecordIndex < expectedRecords.size()) {
-      message += "Row was expected but missing: " + expectedRecords.get(expectedRecordIndex) + "\n";
-      expectedRecordIndex++;
-    }
-    while (actualRecordIndex < actualRecords.size()) {
-      message += "Row was not expected but present: " + actualRecords.get(actualRecordIndex) + "\n";
-      actualRecordIndex++;
-    }
-
-    return message;
-  }
-
-  private static boolean jsonNodesNotEquivalent(JsonNode expectedValue, JsonNode actualValue) {
-    // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
-    return !Objects.equals(expectedValue, actualValue)
-        // Objects.equals expects the two values to be the same class.
-        // We need to handle comparisons between e.g. LongNode and IntNode.
-        && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
-        && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble());
-  }
-
-  /**
-   * Verify that all fields in the actual record are present in the expected record. This is primarily
-   * relevant for detecting fields that we expected to be null, but actually were not. See
-   * {@link #dumpFinalTableRecords(String, String)} for an explanation of how SQL/JSON nulls are
-   * represented in the expected record.
-   * <p>
-   * This has the side benefit of detecting completely unexpected columns, which would be a very weird
-   * bug but is probably still useful to catch.
-   */
-  private static LinkedHashMap<String, JsonNode> checkForExtraOrNonNullFields(JsonNode expectedRecord, JsonNode actualRecord) {
-    LinkedHashMap<String, JsonNode> extraFields = new LinkedHashMap<>();
-    for (String column : Streams.stream(actualRecord.fieldNames()).sorted().toList()) {
-      // loaded_at and raw_id are generated dynamically, so we just ignore them.
-      if (!"_airbyte_loaded_at".equals(column) && !"_airbyte_raw_id".equals(column) && !expectedRecord.has(column)) {
-        extraFields.put(column, actualRecord.get(column));
-      }
-    }
-    return extraFields;
-  }
-
-  /**
-   * Produce a pretty-printed error message, e.g. " For column foo, expected 1 but got 2". It's
-   * indented intentionally.
-   */
-  private static String generateFieldError(String fieldname, JsonNode expectedValue, JsonNode actualValue) {
-    String expectedString = expectedValue == null ? "SQL NULL (i.e. no value)" : expectedValue.toString();
-    String actualString = actualValue == null ? "SQL NULL (i.e. no value)" : actualValue.toString();
-    return "  For " + fieldname + ", expected " + expectedString + " but got " + actualString + "\n";
-  }
-
-  // These asFoo methods are used for sorting records, so their defaults are intended to make broken
-  // records stand out.
-  private static long asInt(JsonNode node) {
-    if (node == null || !node.isIntegralNumber()) {
-      return Long.MIN_VALUE;
-    } else {
-      return node.longValue();
-    }
-  }
-
-  private static String asString(JsonNode node) {
-    if (node == null || node.isNull()) {
-      return "";
-    } else if (node.isTextual()) {
-      return node.asText();
-    } else {
-      return Jsons.serialize(node);
-    }
-  }
-
-  private static Instant asTimestamp(JsonNode node) {
-    if (node == null || !node.isTextual()) {
-      return Instant.ofEpochMilli(Long.MIN_VALUE);
-    } else {
-      return Instant.parse(node.asText());
-    }
-  }
-
   /*
    * !!!!!! WARNING !!!!!! The code below was mostly copypasted from DestinationAcceptanceTest. If you
    * make edits here, you probably want to also edit there.
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
new file mode 100644
index 000000000000..c62a1bcfa706
--- /dev/null
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -0,0 +1,365 @@
+package io.airbyte.integrations.base.destination.typing_deduping;
+
+import static org.junit.jupiter.api.Assertions.assertAll;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.Streams;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.AirbyteProtocolType;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.OffsetTime;
+import java.time.ZoneOffset;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.tuple.Pair;
+
+/**
+ * Utility class to generate human-readable diffs between expected and actual records. Assumes 1s1t output format.
+ */
+public class RecordDiffer {
+
+  private final Comparator<JsonNode> rawRecordIdentityComparator;
+  private final Comparator<JsonNode> rawRecordSortComparator;
+  private final Function<JsonNode, String> rawRecordIdentityExtractor;
+  private final Comparator<JsonNode> finalRecordIdentityComparator;
+  private final Comparator<JsonNode> finalRecordSortComparator;
+  private final Function<JsonNode, String> finalRecordIdentityExtractor;
+
+  public RecordDiffer(Pair<String, AirbyteType>... columns) {
+    // Start with a noop comparator for convenience
+    Comparator<JsonNode> rawIdComp = Comparator.comparing(record -> 0);
+    Comparator<JsonNode> finalIdComp = Comparator.comparing(record -> 0);
+    for (Pair<String, AirbyteType> column : columns) {
+      rawIdComp = rawIdComp.thenComparing(record -> extract(record.get("_airbyte_data"), column.getKey(), column.getValue()));
+      finalIdComp = finalIdComp.thenComparing(record -> extract(record, column.getKey(), column.getValue()));
+    }
+    this.rawRecordIdentityComparator = rawIdComp.thenComparing(record -> asTimestampWithTimezone(record.get("_airbyte_extracted_at")));
+    this.rawRecordSortComparator = rawRecordIdentityComparator.thenComparing(record -> asString(record.get("_airbyte_raw_id")));
+    this.finalRecordIdentityComparator = finalIdComp.thenComparing(record -> asTimestampWithTimezone(record.get("_airbyte_extracted_at")));
+    this.finalRecordSortComparator = finalRecordIdentityComparator.thenComparing(record -> asString(record.get("_airbyte_raw_id")));
+
+    rawRecordIdentityExtractor = record -> Arrays.stream(columns)
+        .map(column -> getPrintableFieldIfPresent(record.get("_airbyte_data"), column.getKey()))
+        .collect(Collectors.joining("; "))
+        + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
+    finalRecordIdentityExtractor = record -> Arrays.stream(columns)
+        .map(column -> getPrintableFieldIfPresent(record, column.getKey()))
+        .collect(Collectors.joining("; "))
+        + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
+  }
+
+  /**
+   * In the expected records, a SQL null is represented as a JsonNode without that field at all, and a JSON null is
+   * represented as a NullNode. For example, in the JSON blob {"name":  null}, the `name` field is a JSON null, and the
+   * `address` field is a SQL null.
+   */
+  public void verifySyncResult(List<JsonNode> expectedRawRecords,
+                               List<JsonNode> actualRawRecords,
+                               List<JsonNode> expectedFinalRecords,
+                               List<JsonNode> actualFinalRecords) {
+    assertAll(
+        () -> diffRawTableRecords(expectedRawRecords, actualRawRecords),
+        () -> diffFinalTableRecords(expectedFinalRecords, actualFinalRecords)
+    );
+  }
+
+  private void diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
+    String diff = diffRecords(
+        expectedRecords,
+        actualRecords,
+        rawRecordIdentityComparator,
+        rawRecordSortComparator,
+        rawRecordIdentityExtractor,
+        true);
+
+    assertTrue(diff.isEmpty(), "Raw table was incorrect.\n" + diff);
+  }
+
+  private void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
+    String diff = diffRecords(
+        expectedRecords,
+        actualRecords,
+        finalRecordIdentityComparator,
+        finalRecordSortComparator,
+        finalRecordIdentityExtractor,
+        false);
+
+    assertTrue(diff.isEmpty(), "Final table was incorrect.\n" + diff);
+  }
+
+  private static String getPrintableFieldIfPresent(JsonNode record, String field) {
+    if (record.has(field)) {
+      return field + "=" + record.get(field) + "; ";
+    } else {
+      return "";
+    }
+  }
+
+  /**
+   * Generate a human-readable diff between the two lists. Only checks the keys specified in
+   * expectedRecords. Assumes (in general) that two records with the same PK, cursor, and extracted_at
+   * are the same record.
+   *
+   * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
+   *        PK+cursor+extracted_at)
+   * @param sortComparator Behaves identically to identityComparator, but if two records are the same,
+   *        breaks that tie using _airbyte_raw_id
+   * @param recordIdExtractor Dump the record's PK+cursor+extracted_at into a human-readable string
+   * @param extractRawData Whether to look inside the _airbyte_data column and diff its subfields
+   * @return The diff, or empty string if there were no differences
+   */
+  private static String diffRecords(List<JsonNode> originalExpectedRecords,
+                                    List<JsonNode> originalActualRecords,
+                                    Comparator<JsonNode> identityComparator,
+                                    Comparator<JsonNode> sortComparator,
+                                    Function<JsonNode, String> recordIdExtractor,
+                                    boolean extractRawData) {
+    List<JsonNode> expectedRecords = originalExpectedRecords.stream().sorted(sortComparator).toList();
+    List<JsonNode> actualRecords = originalActualRecords.stream().sorted(sortComparator).toList();
+
+    // Iterate through both lists in parallel and compare each record.
+    // Build up an error message listing any incorrect, missing, or unexpected records.
+    String message = "";
+    int expectedRecordIndex = 0;
+    int actualRecordIndex = 0;
+    while (expectedRecordIndex < expectedRecords.size() && actualRecordIndex < actualRecords.size()) {
+      JsonNode expectedRecord = expectedRecords.get(expectedRecordIndex);
+      JsonNode actualRecord = actualRecords.get(actualRecordIndex);
+      int compare = identityComparator.compare(expectedRecord, actualRecord);
+      if (compare == 0) {
+        // These records should be the same. Find the specific fields that are different.
+        boolean foundMismatch = false;
+        String mismatchedRecordMessage = "Row had incorrect data:" + recordIdExtractor.apply(expectedRecord) + "\n";
+        // Iterate through each column in the expected record and compare it to the actual record's value.
+        for (String column : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
+          if (extractRawData && "_airbyte_data".equals(column)) {
+            // For the raw data in particular, we should also diff the fields inside _airbyte_data.
+            JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
+            JsonNode actualRawData = actualRecord.get("_airbyte_data");
+            // Iterate through all the subfields of the expected raw data and check that they match the actual
+            // record...
+            for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
+              JsonNode expectedValue = expectedRawData.get(field);
+              JsonNode actualValue = actualRawData.get(field);
+              if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
+                mismatchedRecordMessage += generateFieldError("_airbyte_data." + field, expectedValue, actualValue);
+                foundMismatch = true;
+              }
+            }
+            // ... and then check the actual raw data for any subfields that we weren't expecting.
+            LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRawData, actualRawData);
+            if (extraColumns.size() > 0) {
+              for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
+                mismatchedRecordMessage += generateFieldError("_airbyte_data." + extraColumn.getKey(), null, extraColumn.getValue());
+                foundMismatch = true;
+              }
+            }
+          } else {
+            // For all other columns, we can just compare their values directly.
+            JsonNode expectedValue = expectedRecord.get(column);
+            JsonNode actualValue = actualRecord.get(column);
+            if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
+              mismatchedRecordMessage += generateFieldError("column " + column, expectedValue, actualValue);
+              foundMismatch = true;
+            }
+          }
+        }
+        // Then check the entire actual record for any columns that we weren't expecting.
+        LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRecord, actualRecord);
+        if (extraColumns.size() > 0) {
+          for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
+            mismatchedRecordMessage += generateFieldError("column " + extraColumn.getKey(), null, extraColumn.getValue());
+            foundMismatch = true;
+          }
+        }
+        if (foundMismatch) {
+          message += mismatchedRecordMessage;
+        }
+
+        expectedRecordIndex++;
+        actualRecordIndex++;
+      } else if (compare < 0) {
+        // The expected record is missing from the actual records. Print it and move on to the next expected
+        // record.
+        message += "Row was expected but missing: " + expectedRecord + "\n";
+        expectedRecordIndex++;
+      } else {
+        // There's an actual record which isn't present in the expected records. Print it and move on to the
+        // next actual record.
+        message += "Row was not expected but present: " + actualRecord + "\n";
+        actualRecordIndex++;
+      }
+    }
+    // Tail loops in case we reached the end of one list before the other.
+    while (expectedRecordIndex < expectedRecords.size()) {
+      message += "Row was expected but missing: " + expectedRecords.get(expectedRecordIndex) + "\n";
+      expectedRecordIndex++;
+    }
+    while (actualRecordIndex < actualRecords.size()) {
+      message += "Row was not expected but present: " + actualRecords.get(actualRecordIndex) + "\n";
+      actualRecordIndex++;
+    }
+
+    return message;
+  }
+
+  private static boolean jsonNodesNotEquivalent(JsonNode expectedValue, JsonNode actualValue) {
+    // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
+    return !Objects.equals(expectedValue, actualValue)
+        // Objects.equals expects the two values to be the same class.
+        // We need to handle comparisons between e.g. LongNode and IntNode.
+        && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
+        && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble());
+  }
+
+  /**
+   * Verify that all fields in the actual record are present in the expected record. This is primarily
+   * relevant for detecting fields that we expected to be null, but actually were not. See
+   * {@link BaseTypingDedupingTest#dumpFinalTableRecords(String, String)} for an explanation of how SQL/JSON nulls are
+   * represented in the expected record.
+   * <p>
+   * This has the side benefit of detecting completely unexpected columns, which would be a very weird
+   * bug but is probably still useful to catch.
+   */
+  private static LinkedHashMap<String, JsonNode> checkForExtraOrNonNullFields(JsonNode expectedRecord, JsonNode actualRecord) {
+    LinkedHashMap<String, JsonNode> extraFields = new LinkedHashMap<>();
+    for (String column : Streams.stream(actualRecord.fieldNames()).sorted().toList()) {
+      // loaded_at and raw_id are generated dynamically, so we just ignore them.
+      if (!"_airbyte_loaded_at".equals(column) && !"_airbyte_raw_id".equals(column) && !expectedRecord.has(column)) {
+        extraFields.put(column, actualRecord.get(column));
+      }
+    }
+    return extraFields;
+  }
+
+  /**
+   * Produce a pretty-printed error message, e.g. " For column foo, expected 1 but got 2". The leading spaces are
+   * intentional, to make the message easier to read when it's embedded in a larger stacktrace.
+   */
+  private static String generateFieldError(String fieldname, JsonNode expectedValue, JsonNode actualValue) {
+    String expectedString = expectedValue == null ? "SQL NULL (i.e. no value)" : expectedValue.toString();
+    String actualString = actualValue == null ? "SQL NULL (i.e. no value)" : actualValue.toString();
+    return "  For " + fieldname + ", expected " + expectedString + " but got " + actualString + "\n";
+  }
+
+  // These asFoo methods are used for sorting records, so their defaults are intended to make broken
+  // records stand out.
+  private static String asString(JsonNode node) {
+    if (node == null || node.isNull()) {
+      return "";
+    } else if (node.isTextual()) {
+      return node.asText();
+    } else {
+      return Jsons.serialize(node);
+    }
+  }
+
+  private static double asDouble(JsonNode node) {
+    if (node == null || !node.isNumber()) {
+      return Double.MIN_VALUE;
+    } else {
+      return node.longValue();
+    }
+  }
+
+  private static long asInt(JsonNode node) {
+    if (node == null || !node.isIntegralNumber()) {
+      return Long.MIN_VALUE;
+    } else {
+      return node.longValue();
+    }
+  }
+
+  private static boolean asBoolean(JsonNode node) {
+    if (node == null || !node.isBoolean()) {
+      return false;
+    } else {
+      return node.asBoolean();
+    }
+  }
+
+  private static Instant asTimestampWithTimezone(JsonNode node) {
+    if (node == null || !node.isTextual()) {
+      return Instant.ofEpochMilli(Long.MIN_VALUE);
+    } else {
+      try {
+        return Instant.parse(node.asText());
+      } catch (Exception e) {
+        return Instant.ofEpochMilli(Long.MIN_VALUE);
+      }
+    }
+  }
+
+  private static LocalDateTime asTimestampWithoutTimezone(JsonNode node) {
+    if (node == null || !node.isTextual()) {
+      return LocalDateTime.ofInstant(Instant.ofEpochMilli(Long.MIN_VALUE), ZoneOffset.UTC);
+    } else {
+      try {
+        return LocalDateTime.parse(node.asText());
+      } catch (Exception e) {
+        return LocalDateTime.ofInstant(Instant.ofEpochMilli(Long.MIN_VALUE), ZoneOffset.UTC);
+      }
+    }
+  }
+
+  private static OffsetTime asTimeWithTimezone(JsonNode node) {
+    if (node == null || !node.isTextual()) {
+      return OffsetTime.of(0, 0, 0, 0, ZoneOffset.UTC);
+    } else {
+      return OffsetTime.parse(node.asText());
+    }
+  }
+
+  private static LocalTime asTimeWithoutTimezone(JsonNode node) {
+    if (node == null || !node.isTextual()) {
+      return LocalTime.of(0, 0, 0);
+    } else {
+      try {
+        return LocalTime.parse(node.asText());
+      } catch (Exception e) {
+        return LocalTime.of(0, 0, 0);
+      }
+    }
+  }
+
+  private static LocalDate asDate(JsonNode node) {
+    if (node == null || !node.isTextual()) {
+      return LocalDate.ofInstant(Instant.ofEpochMilli(Long.MIN_VALUE), ZoneOffset.UTC);
+    } else {
+      try {
+        return LocalDate.parse(node.asText());
+      } catch (Exception e) {
+        return LocalDate.ofInstant(Instant.ofEpochMilli(Long.MIN_VALUE), ZoneOffset.UTC);
+      }
+    }
+  }
+
+  private static Comparable extract(JsonNode node, String field, AirbyteType type) {
+    if (type instanceof AirbyteProtocolType t) {
+      return switch (t) {
+        case STRING -> asString(node.get(field));
+        case NUMBER -> asDouble(node.get(field));
+        case INTEGER -> asInt(node.get(field));
+        case BOOLEAN -> asBoolean(node.get(field));
+        case TIMESTAMP_WITH_TIMEZONE -> asTimestampWithTimezone(node.get(field));
+        case TIMESTAMP_WITHOUT_TIMEZONE -> asTimestampWithoutTimezone(node.get(field));
+        case TIME_WITH_TIMEZONE -> asTimeWithTimezone(node.get(field));
+        case TIME_WITHOUT_TIMEZONE -> asTimeWithoutTimezone(node.get(field));
+        case DATE -> asDate(node.get(field));
+        case UNKNOWN -> node.toString();
+      };
+    } else {
+      return node.toString();
+    }
+  }
+}
diff --git a/airbyte-integrations/bases/base-typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/AirbyteType.java b/airbyte-integrations/bases/base-typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/AirbyteType.java
index 7c6dcc28597d..2d3b0628e8c4 100644
--- a/airbyte-integrations/bases/base-typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/AirbyteType.java
+++ b/airbyte-integrations/bases/base-typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/AirbyteType.java
@@ -74,6 +74,8 @@ static AirbyteType fromJsonSchema(final JsonNode schema) {
     return AirbyteTypeUtils.getAirbyteProtocolType(schema);
   }
 
+  public LinkedHashMap<String, AirbyteType> asColumns();
+
   private static Struct getStruct(final JsonNode schema) {
     final LinkedHashMap<String, AirbyteType> propertiesMap = new LinkedHashMap<>();
     final JsonNode properties = schema.get("properties");
@@ -107,6 +109,11 @@ public static AirbyteProtocolType matches(final String type) {
       }
     }
 
+    @Override
+    public LinkedHashMap<String, AirbyteType> asColumns() {
+      throw new UnsupportedOperationException("Basic types cannot be converted to columns.");
+    }
+
   }
 
   /**
@@ -114,10 +121,18 @@ public static AirbyteProtocolType matches(final String type) {
    */
   record Struct(LinkedHashMap<String, AirbyteType> properties) implements AirbyteType {
 
+    @Override
+    public LinkedHashMap<String, AirbyteType> asColumns() {
+      return properties;
+    }
   }
 
   record Array(AirbyteType items) implements AirbyteType {
 
+    @Override
+    public LinkedHashMap<String, AirbyteType> asColumns() {
+      throw new UnsupportedOperationException("Arrays cannot be converted to columns.");
+    }
   }
 
   /**
@@ -127,6 +142,10 @@ record Array(AirbyteType items) implements AirbyteType {
    */
   record UnsupportedOneOf(List<AirbyteType> options) implements AirbyteType {
 
+    @Override
+    public LinkedHashMap<String, AirbyteType> asColumns() {
+      throw new UnsupportedOperationException("OneOf cannot be converted to columns.");
+    }
   }
 
   /**

From 9c136f7ab616b3640a23cb6d1420ebd2f3d785b5 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 10:46:50 -0700
Subject: [PATCH 24/46] and use it in the sql generator test

---
 .../typing_deduping/RecordDiffer.java         |   4 +-
 .../AbstractBigQueryTypingDedupingTest.java   |  21 +-
 .../BigQuerySqlGeneratorIntegrationTest.java  | 919 +++++++-----------
 3 files changed, 358 insertions(+), 586 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index c62a1bcfa706..5e4385c0f4e9 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -73,7 +73,7 @@ public void verifySyncResult(List<JsonNode> expectedRawRecords,
     );
   }
 
-  private void diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
+  public void diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
     String diff = diffRecords(
         expectedRecords,
         actualRecords,
@@ -85,7 +85,7 @@ private void diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode>
     assertTrue(diff.isEmpty(), "Raw table was incorrect.\n" + diff);
   }
 
-  private void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
+  public void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
     String diff = diffRecords(
         expectedRecords,
         actualRecords,
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
index 912db29823be..4fa25ee9b73f 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
@@ -43,8 +43,7 @@ protected List<JsonNode> dumpRawTableRecords(String streamNamespace, String stre
       streamNamespace = BigQueryUtils.getDatasetId(getConfig());
     }
     TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM airbyte." + streamNamespace + "_" + streamName));
-    List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
-    return rowsAsMaps.stream().map(AbstractBigQueryTypingDedupingTest::toJson).toList();
+    return BigQuerySqlGeneratorIntegrationTest.toJsonRecords(result);
   }
 
   @Override
@@ -53,8 +52,7 @@ protected List<JsonNode> dumpFinalTableRecords(String streamNamespace, String st
       streamNamespace = BigQueryUtils.getDatasetId(getConfig());
     }
     TableResult result = bq.query(QueryJobConfiguration.of("SELECT * FROM " + streamNamespace + "." + streamName));
-    List<LinkedHashMap<String, Object>> rowsAsMaps = BigQuerySqlGeneratorIntegrationTest.toMaps(result);
-    return rowsAsMaps.stream().map(AbstractBigQueryTypingDedupingTest::toJson).toList();
+    return BigQuerySqlGeneratorIntegrationTest.toJsonRecords(result);
   }
 
   @Override
@@ -64,19 +62,4 @@ protected void teardownStreamAndNamespace(String streamNamespace, String streamN
     bq.delete(TableId.of("airbyte", streamNamespace + "_" + streamName));
     bq.delete(DatasetId.of(streamNamespace), BigQuery.DatasetDeleteOption.deleteContents());
   }
-
-  private static JsonNode toJson(LinkedHashMap<String, Object> map) {
-    ObjectNode o = (ObjectNode) Jsons.emptyObject();
-    map.forEach((key, value) -> {
-      if (value == null) {
-        // If the value is null, do nothing. We don't want to insert it into the json at all.
-      } else if (value instanceof Instant i) {
-        // naively converting an Instant returns a DecimalNode with the unix epoch, so instead we manually stringify it
-        o.set(key, Jsons.jsonNode(i.toString()));
-      } else {
-        o.set(key, Jsons.jsonNode(value));
-      }
-    });
-    return o;
-  }
 }
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
index 4ec3b2876be9..2c88a54b74c9 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
@@ -5,33 +5,43 @@
 package io.airbyte.integrations.destination.bigquery.typing_deduping;
 
 import static com.google.cloud.bigquery.LegacySQLTypeName.legacySQLTypeName;
-import static java.util.stream.Collectors.toSet;
 import static org.junit.jupiter.api.Assertions.*;
 
 import com.fasterxml.jackson.databind.JsonNode;
-import com.google.cloud.bigquery.*;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.BigQueryException;
+import com.google.cloud.bigquery.DatasetInfo;
+import com.google.cloud.bigquery.Field;
 import com.google.cloud.bigquery.Field.Mode;
-import com.google.common.collect.ImmutableMap;
+import com.google.cloud.bigquery.FieldValue;
+import com.google.cloud.bigquery.FieldValueList;
+import com.google.cloud.bigquery.QueryJobConfiguration;
+import com.google.cloud.bigquery.Schema;
+import com.google.cloud.bigquery.StandardSQLTypeName;
+import com.google.cloud.bigquery.Table;
+import com.google.cloud.bigquery.TableResult;
 import io.airbyte.commons.json.Jsons;
 import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType;
 import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.AirbyteProtocolType;
 import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.Array;
 import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.Struct;
 import io.airbyte.integrations.base.destination.typing_deduping.CatalogParser.StreamConfig;
+import io.airbyte.integrations.base.destination.typing_deduping.RecordDiffer;
 import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator.ColumnId;
 import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator.StreamId;
 import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
 import io.airbyte.protocol.models.v0.DestinationSyncMode;
 import io.airbyte.protocol.models.v0.SyncMode;
-import java.math.BigDecimal;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.Duration;
-import java.time.Instant;
-import java.util.*;
-import java.util.Map.Entry;
-import java.util.function.Function;
-import java.util.stream.Collectors;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.UUID;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.text.StringSubstitutor;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeAll;
@@ -52,22 +62,11 @@ public class BigQuerySqlGeneratorIntegrationTest {
   public static final List<ColumnId> PRIMARY_KEY = List.of(ID_COLUMN);
   public static final ColumnId CURSOR = GENERATOR.buildColumnId("updated_at");
   public static final ColumnId CDC_CURSOR = GENERATOR.buildColumnId("_ab_cdc_lsn");
-  /**
-   * Super hacky way to sort rows represented as {@code Map<String, Object>}
-   */
-  public static final Comparator<Map<String, Object>> ROW_COMPARATOR = (row1, row2) -> {
-    int cmp;
-    cmp = compareRowsOnColumn(ID_COLUMN.name(), row1, row2);
-    if (cmp != 0) {
-      return cmp;
-    }
-    cmp = compareRowsOnColumn(CURSOR.name(), row1, row2);
-    if (cmp != 0) {
-      return cmp;
-    }
-    cmp = compareRowsOnColumn(CDC_CURSOR.name(), row1, row2);
-    return cmp;
-  };
+  public static final RecordDiffer DIFFER = new RecordDiffer(
+      Pair.of("id", AirbyteProtocolType.INTEGER),
+      Pair.of("updated_at", AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE),
+      Pair.of("_ab_cdc_lsn", AirbyteProtocolType.INTEGER)
+  );
   public static final String QUOTE = "`";
   private static final LinkedHashMap<ColumnId, AirbyteType> COLUMNS;
   private static final LinkedHashMap<ColumnId, AirbyteType> CDC_COLUMNS;
@@ -182,13 +181,13 @@ public void testCreateTableIncremental() throws InterruptedException {
   public void testVerifyPrimaryKeysIncremental() throws InterruptedException {
     createRawTable();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{}', '10d6e27d-ae7a-41b5-baf8-c4c277ef9c11', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 1}', '5ce60e70-98aa-4fe3-8159-67207352c4f0', '2023-01-01T00:00:00Z');
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{}', '10d6e27d-ae7a-41b5-baf8-c4c277ef9c11', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 1}', '5ce60e70-98aa-4fe3-8159-67207352c4f0', '2023-01-01T00:00:00Z');
+                    """))
         .build());
 
     // This variable is declared outside of the transaction, so we need to do it manually here
@@ -206,60 +205,58 @@ public void testInsertNewRecordsIncremental() throws InterruptedException {
     createRawTable();
     createFinalTable();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}}', '972fa08a-aa06-4b91-a6af-a371aee4cb1c', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}}', '233ad43d-de50-4a47-bbe6-7a417ce60d9d', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'd4aeb036-2d95-4880-acd2-dc69b42b03c6', '2023-01-01T00:00:00Z');
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}}', '972fa08a-aa06-4b91-a6af-a371aee4cb1c', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}}', '233ad43d-de50-4a47-bbe6-7a417ce60d9d', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'd4aeb036-2d95-4880-acd2-dc69b42b03c6', '2023-01-01T00:00:00Z');
+                    """))
         .build());
 
     final String sql = GENERATOR.insertNewRecords(streamId, "", COLUMNS, DestinationSyncMode.OVERWRITE);
     logAndExecute(sql);
 
     final TableResult result = bq.query(QueryJobConfiguration.newBuilder("SELECT * FROM " + streamId.finalTableId(QUOTE)).build());
-    assertQueryResult(
+    DIFFER.diffFinalTableRecords(
         List.of(
-            Map.of(
-                "id", Optional.of(1L),
-                "updated_at", Optional.of(Instant.parse("2023-01-01T01:00:00Z")),
-                "string", Optional.of("Alice"),
-                "struct", Optional.of(Jsons.deserialize(
-                    """
-                    {"city": "San Francisco", "state": "CA"}
-                    """)),
-                "_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")),
-                "_airbyte_meta", Optional.of(Jsons.deserialize(
-                    """
-                    {"errors":[]}
-                    """))),
-            Map.of(
-                "id", Optional.of(1L),
-                "updated_at", Optional.of(Instant.parse("2023-01-01T02:00:00Z")),
-                "string", Optional.of("Alice"),
-                "struct", Optional.of(Jsons.deserialize(
+            Jsons.deserialize(
+                """
+                    {
+                      "id": 1,
+                      "updated_at": "2023-01-01T01:00:00Z",
+                      "string": "Alice",
+                      "struct": {"city": "San Francisco", "state": "CA"},
+                      "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                      "_airbyte_meta": {"errors":[]}
+                    }
                     """
-                    {"city": "San Diego", "state": "CA"}
-                    """)),
-                "_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")),
-                "_airbyte_meta", Optional.of(Jsons.deserialize(
+            ),
+            Jsons.deserialize(
+                """
+                    {
+                      "id": 1,
+                      "updated_at": "2023-01-01T02:00:00Z",
+                      "string": "Alice",
+                      "struct": {"city": "San Diego", "state": "CA"},
+                      "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                      "_airbyte_meta": {"errors":[]}
+                    }
                     """
-                    {"errors":[]}
-                    """))),
-            Map.of(
-                "id", Optional.of(2L),
-                "updated_at", Optional.of(Instant.parse("2023-01-01T03:00:00Z")),
-                "string", Optional.of("Bob"),
-                "struct", Optional.empty(),
-                "integer", Optional.empty(),
-                "_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")),
-                "_airbyte_meta", Optional.of(Jsons.deserialize(
+            ),
+            Jsons.deserialize(
+                """
+                    {
+                      "id": 2,
+                      "updated_at": "2023-01-01T03:00:00Z",
+                      "string": "Bob",
+                      "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                      "_airbyte_meta": {"errors":["Problem with `integer`"]}
+                    }
                     """
-                    {"errors":["Problem with `integer`"]}
-                    """)))),
-        result);
+            )),
+        toJsonRecords(result));
   }
 
   @Test
@@ -267,52 +264,50 @@ public void testDedupFinalTable() throws InterruptedException {
     createRawTable();
     createFinalTable();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
-
-                INSERT INTO ${dataset}.users_final (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `updated_at`, `string`, `struct`, `integer`) values
-                  ('d7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z', JSON'{"errors":[]}', 1, '2023-01-01T01:00:00Z', 'Alice', JSON'{"city": "San Francisco", "state": "CA"}', 42),
-                  ('80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z', JSON'{"errors":[]}', 1, '2023-01-01T02:00:00Z', 'Alice', JSON'{"city": "San Diego", "state": "CA"}', 84),
-                  ('ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z', JSON'{"errors": ["blah blah integer"]}', 2, '2023-01-01T03:00:00Z', 'Bob', NULL, NULL);
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
+
+                    INSERT INTO ${dataset}.users_final (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `updated_at`, `string`, `struct`, `integer`) values
+                      ('d7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z', JSON'{"errors":[]}', 1, '2023-01-01T01:00:00Z', 'Alice', JSON'{"city": "San Francisco", "state": "CA"}', 42),
+                      ('80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z', JSON'{"errors":[]}', 1, '2023-01-01T02:00:00Z', 'Alice', JSON'{"city": "San Diego", "state": "CA"}', 84),
+                      ('ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z', JSON'{"errors": ["blah blah integer"]}', 2, '2023-01-01T03:00:00Z', 'Bob', NULL, NULL);
+                    """))
         .build());
 
     final String sql = GENERATOR.dedupFinalTable(streamId, "", PRIMARY_KEY, CURSOR, COLUMNS);
     logAndExecute(sql);
 
     final TableResult result = bq.query(QueryJobConfiguration.newBuilder("SELECT * FROM " + streamId.finalTableId(QUOTE)).build());
-    assertQueryResult(
+    DIFFER.diffFinalTableRecords(
         List.of(
-            Map.of(
-                "id", Optional.of(1L),
-                "updated_at", Optional.of(Instant.parse("2023-01-01T02:00:00Z")),
-                "string", Optional.of("Alice"),
-                "struct", Optional.of(Jsons.deserialize(
-                    """
-                    {"city": "San Diego", "state": "CA"}
+            Jsons.deserialize(
+                """
+                    {
+                      "id": 1,
+                      "updated_at": "2023-01-01T02:00:00Z",
+                      "string": "Alice",
+                      "struct": {"city": "San Diego", "state": "CA"},
+                      "integer": 84,
+                      "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                      "_airbyte_meta": {"errors":[]}
+                    }
+                    """),
+            Jsons.deserialize(
+                """
+                    {
+                      "id": 2,
+                      "updated_at": "2023-01-01T03:00:00Z",
+                      "string": "Bob",
+                      "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                      "_airbyte_meta": {"errors":["blah blah integer"]}
+                    }
                     """)),
-                "integer", Optional.of(84L),
-                "_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")),
-                "_airbyte_meta", Optional.of(Jsons.deserialize(
-                    """
-                    {"errors":[]}
-                    """))),
-            Map.of(
-                "id", Optional.of(2L),
-                "updated_at", Optional.of(Instant.parse("2023-01-01T03:00:00Z")),
-                "string", Optional.of("Bob"),
-                "struct", Optional.empty(),
-                "_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")),
-                "_airbyte_meta", Optional.of(Jsons.deserialize(
-                    """
-                    {"errors":["blah blah integer"]}
-                    """)))),
-        result);
+        toJsonRecords(result));
   }
 
   @Test
@@ -320,54 +315,58 @@ public void testDedupRawTable() throws InterruptedException {
     createRawTable();
     createFinalTable();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
-
-                INSERT INTO ${dataset}.users_final (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `updated_at`, `string`, `struct`, `integer`) values
-                  ('80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z', JSON'{"errors":[]}', 1, '2023-01-01T02:00:00Z', 'Alice', JSON'{"city": "San Diego", "state": "CA"}', 84),
-                  ('ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z', JSON'{"errors": ["blah blah integer"]}', 2, '2023-01-01T03:00:00Z', 'Bob', NULL, NULL);
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
+
+                    INSERT INTO ${dataset}.users_final (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `updated_at`, `string`, `struct`, `integer`) values
+                      ('80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z', JSON'{"errors":[]}', 1, '2023-01-01T02:00:00Z', 'Alice', JSON'{"city": "San Diego", "state": "CA"}', 84),
+                      ('ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z', JSON'{"errors": ["blah blah integer"]}', 2, '2023-01-01T03:00:00Z', 'Bob', NULL, NULL);
+                    """))
         .build());
 
     final String sql = GENERATOR.dedupRawTable(streamId, "", CDC_COLUMNS);
     logAndExecute(sql);
 
     final TableResult result = bq.query(QueryJobConfiguration.newBuilder("SELECT * FROM " + streamId.rawTableId(QUOTE)).build());
-    assertQueryResult(
+    DIFFER.diffFinalTableRecords(
         List.of(
-            Map.of(
-                "_airbyte_raw_id", Optional.of("80c99b54-54b4-43bd-b51b-1f67dafa2c52"),
-                "_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")),
-                "_airbyte_data", Optional.of(Jsons.deserialize(
+            Jsons.deserialize(
+                """
+                    {
+                      "_airbyte_raw_id": "80c99b54-54b4-43bd-b51b-1f67dafa2c52",
+                      "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                      "_airbyte_data": {"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}
+                    }
                     """
-                    {"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}
-                    """))),
-            Map.of(
-                "_airbyte_raw_id", Optional.of("ad690bfb-c2c2-4172-bd73-a16c86ccbb67"),
-                "_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")),
-                "_airbyte_data", Optional.of(Jsons.deserialize(
+            ),
+            Jsons.deserialize(
+                """
+                    {
+                      "_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67",
+                      "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                      "_airbyte_data": {"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}
+                    }
                     """
-                    {"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}
-                    """)))),
-        result);
+            )),
+        toJsonRecords(result));
   }
 
   @Test
   public void testCommitRawTable() throws InterruptedException {
     createRawTable();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
+                    """))
         .build());
 
     final String sql = GENERATOR.commitRawTable(streamId);
@@ -383,107 +382,72 @@ public void testFullUpdateAllTypes() throws InterruptedException {
     createRawTable();
     createFinalTable("_foo");
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_raw_id`, `_airbyte_extracted_at`, `_airbyte_data`) VALUES
-                  (generate_uuid(), '2023-01-01T00:00:00Z', JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}'),
-                  (generate_uuid(), '2023-01-01T00:00:00Z', JSON'{"id": 2, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}'),
-                  (generate_uuid(), '2023-01-01T00:00:00Z', JSON'{"id": 3, "updated_at": "2023-01-01T01:00:00Z"}'),
-                  (generate_uuid(), '2023-01-01T00:00:00Z', JSON'{"id": 4, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": {}, "number": {}, "integer": {}, "boolean": {}, "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": {}, "unknown": null}');
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_raw_id`, `_airbyte_extracted_at`, `_airbyte_data`) VALUES
+                      (generate_uuid(), '2023-01-01T00:00:00Z', JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}'),
+                      (generate_uuid(), '2023-01-01T00:00:00Z', JSON'{"id": 2, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}'),
+                      (generate_uuid(), '2023-01-01T00:00:00Z', JSON'{"id": 3, "updated_at": "2023-01-01T01:00:00Z"}'),
+                      (generate_uuid(), '2023-01-01T00:00:00Z', JSON'{"id": 4, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": {}, "number": {}, "integer": {}, "boolean": {}, "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": {}, "unknown": null}');
+                    """))
         .build());
 
     final String sql = GENERATOR.updateTable("_foo", incrementalDedupStreamConfig());
     logAndExecute(sql);
 
     final TableResult finalTable = bq.query(QueryJobConfiguration.newBuilder("SELECT * FROM " + streamId.finalTableId("_foo", QUOTE)).build());
-    assertQueryResult(
+    DIFFER.diffFinalTableRecords(
         List.of(
-            new ImmutableMap.Builder<String, Optional<Object>>()
-                .put("id", Optional.of(1L))
-                .put("updated_at", Optional.of(Instant.parse("2023-01-01T01:00:00Z")))
-                .put("array", Optional.of(Jsons.deserialize(
-                    """
-                    ["foo"]
-                    """)))
-                .put("struct", Optional.of(Jsons.deserialize(
-                    """
-                    {"foo": "bar"}
-                    """)))
-                .put("string", Optional.of("foo"))
-                .put("number", Optional.of(new BigDecimal("42.1")))
-                .put("integer", Optional.of(42L))
-                .put("boolean", Optional.of(true))
-                .put("timestamp_with_timezone", Optional.of(Instant.parse("2023-01-23T12:34:56Z")))
-                .put("timestamp_without_timezone", Optional.of("2023-01-23T12:34:56"))
-                .put("time_with_timezone", Optional.of("12:34:56Z"))
-                .put("time_without_timezone", Optional.of("12:34:56"))
-                .put("date", Optional.of("2023-01-23"))
-                .put("_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")))
-                .put("_airbyte_meta", Optional.of(Jsons.deserialize(
-                    """
-                    {"errors":[]}
-                    """)))
-                .build(),
-            new ImmutableMap.Builder<String, Optional<Object>>()
-                .put("id", Optional.of(2L))
-                .put("updated_at", Optional.of(Instant.parse("2023-01-01T01:00:00Z")))
-                .put("array", Optional.empty())
-                .put("struct", Optional.empty())
-                .put("string", Optional.empty())
-                .put("number", Optional.empty())
-                .put("integer", Optional.empty())
-                .put("boolean", Optional.empty())
-                .put("timestamp_with_timezone", Optional.empty())
-                .put("timestamp_without_timezone", Optional.empty())
-                .put("time_with_timezone", Optional.empty())
-                .put("time_without_timezone", Optional.empty())
-                .put("date", Optional.empty())
-                .put("_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")))
-                .put("_airbyte_meta", Optional.of(Jsons.deserialize(
-                    """
-                    {"errors":[]}
-                    """)))
-                .build(),
-            new ImmutableMap.Builder<String, Optional<Object>>()
-                .put("id", Optional.of(3L))
-                .put("updated_at", Optional.of(Instant.parse("2023-01-01T01:00:00Z")))
-                .put("array", Optional.empty())
-                .put("struct", Optional.empty())
-                .put("string", Optional.empty())
-                .put("number", Optional.empty())
-                .put("integer", Optional.empty())
-                .put("boolean", Optional.empty())
-                .put("timestamp_with_timezone", Optional.empty())
-                .put("timestamp_without_timezone", Optional.empty())
-                .put("time_with_timezone", Optional.empty())
-                .put("time_without_timezone", Optional.empty())
-                .put("date", Optional.empty())
-                .put("_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")))
-                .put("_airbyte_meta", Optional.of(Jsons.deserialize(
-                    """
-                    {"errors":[]}
-                    """)))
-                .build(),
-            new ImmutableMap.Builder<String, Optional<Object>>()
-                .put("id", Optional.of(4L))
-                .put("updated_at", Optional.of(Instant.parse("2023-01-01T01:00:00Z")))
-                .put("array", Optional.empty())
-                .put("struct", Optional.empty())
-                .put("string", Optional.empty())
-                .put("number", Optional.empty())
-                .put("integer", Optional.empty())
-                .put("boolean", Optional.empty())
-                .put("timestamp_with_timezone", Optional.empty())
-                .put("timestamp_without_timezone", Optional.empty())
-                .put("time_with_timezone", Optional.empty())
-                .put("time_without_timezone", Optional.empty())
-                .put("date", Optional.empty())
-                .put("_airbyte_extracted_at", Optional.of(Instant.parse("2023-01-01T00:00:00Z")))
-                .put("_airbyte_meta", Optional.of(Jsons.deserialize(
-                    """
-                    {"errors":[
+            Jsons.deserialize(
+                """
+                {
+                  "id": 1,
+                  "updated_at": "2023-01-01T01:00:00Z",
+                  "array": ["foo"],
+                  "struct": {"foo": "bar"},
+                  "string": "foo",
+                  "number": 42.1,
+                  "integer": 42,
+                  "boolean": true,
+                  "timestamp_with_timezone": "2023-01-23T12:34:56Z",
+                  "timestamp_without_timezone": "2023-01-23T12:34:56",
+                  "time_with_timezone": "12:34:56Z",
+                  "time_without_timezone": "12:34:56",
+                  "date": "2023-01-23",
+                  "unknown": {},
+                  "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                  "_airbyte_meta": {"errors": []}
+                }
+                """),
+            Jsons.deserialize(
+                """
+                {
+                  "id": 2,
+                  "updated_at": "2023-01-01T01:00:00Z",
+                  "unknown": null,
+                  "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                  "_airbyte_meta": {"errors": []}
+                }
+                """),
+            Jsons.deserialize(
+                """
+                {
+                  "id": 3,
+                  "updated_at": "2023-01-01T01:00:00Z",
+                  "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                  "_airbyte_meta": {"errors": []}
+                }
+                """),
+            Jsons.deserialize(
+                """
+                {
+                  "id": 4,
+                  "updated_at": "2023-01-01T01:00:00Z",
+                  "unknown": null,
+                  "_airbyte_extracted_at": "2023-01-01T00:00:00Z",
+                  "_airbyte_meta": {
+                    "errors": [
                       "Problem with `struct`",
                       "Problem with `array`",
                       "Problem with `string`",
@@ -495,10 +459,11 @@ public void testFullUpdateAllTypes() throws InterruptedException {
                       "Problem with `time_with_timezone`",
                       "Problem with `time_without_timezone`",
                       "Problem with `date`"
-                    ]}
-                    """)))
-                .build()),
-        finalTable);
+                    ]
+                  }
+                }
+                """)),
+        toJsonRecords(finalTable));
 
     final long rawRows = bq.query(QueryJobConfiguration.newBuilder("SELECT * FROM " + streamId.rawTableId(QUOTE)).build()).getTotalRows();
     assertEquals(4, rawRows);
@@ -512,14 +477,14 @@ public void testFullUpdateIncrementalDedup() throws InterruptedException {
     createRawTable();
     createFinalTable();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
+                    """))
         .build());
 
     final String sql = GENERATOR.updateTable("", incrementalDedupStreamConfig());
@@ -540,14 +505,14 @@ public void testFullUpdateIncrementalAppend() throws InterruptedException {
     createRawTable();
     createFinalTable("_foo");
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
+                    """))
         .build());
 
     final String sql = GENERATOR.updateTable("_foo", incrementalAppendStreamConfig());
@@ -571,17 +536,17 @@ public void testFullUpdateFullRefreshAppend() throws InterruptedException {
     createRawTable();
     createFinalTable("_foo");
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
-
-                INSERT INTO ${dataset}.users_final_foo (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `updated_at`, `string`, `struct`, `integer`) values
-                  ('64f4390f-3da1-4b65-b64a-a6c67497f18d', '2022-12-31T00:00:00Z', JSON'{"errors": []}', 1, '2022-12-31T00:00:00Z', 'Alice', NULL, NULL);
-                """))
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}', 'd7b81af0-01da-4846-a650-cc398986bc99', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 1, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}', '80c99b54-54b4-43bd-b51b-1f67dafa2c52', '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 2, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}', 'ad690bfb-c2c2-4172-bd73-a16c86ccbb67', '2023-01-01T00:00:00Z');
+
+                    INSERT INTO ${dataset}.users_final_foo (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `updated_at`, `string`, `struct`, `integer`) values
+                      ('64f4390f-3da1-4b65-b64a-a6c67497f18d', '2022-12-31T00:00:00Z', JSON'{"errors": []}', 1, '2022-12-31T00:00:00Z', 'Alice', NULL, NULL);
+                    """))
         .build());
 
     final String sql = GENERATOR.updateTable("_foo", fullRefreshAppendStreamConfig());
@@ -614,30 +579,30 @@ public void testCdcUpdate() throws InterruptedException {
     createRawTable();
     createFinalTableCdc();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                -- records from a previous sync
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`, `_airbyte_loaded_at`) VALUES
-                  (JSON'{"id": 1, "_ab_cdc_lsn": 900, "string": "spooky ghost", "_ab_cdc_deleted_at": null}', '64f4390f-3da1-4b65-b64a-a6c67497f18d', '2022-12-31T00:00:00Z', '2022-12-31T00:00:01Z'),
-                  (JSON'{"id": 0, "_ab_cdc_lsn": 901, "string": "zombie", "_ab_cdc_deleted_at": "2022-12-31T00:O0:00Z"}', generate_uuid(), '2022-12-31T00:00:00Z', '2022-12-31T00:00:01Z'),
-                  (JSON'{"id": 5, "_ab_cdc_lsn": 902, "string": "will be deleted", "_ab_cdc_deleted_at": null}', 'b6139181-a42c-45c3-89f2-c4b4bb3a8c9d', '2022-12-31T00:00:00Z', '2022-12-31T00:00:01Z');
-                INSERT INTO ${dataset}.users_final (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `_ab_cdc_lsn`, `string`, `struct`, `integer`) values
-                  ('64f4390f-3da1-4b65-b64a-a6c67497f18d', '2022-12-31T00:00:00Z', JSON'{}', 1, 900, 'spooky ghost', NULL, NULL),
-                  ('b6139181-a42c-45c3-89f2-c4b4bb3a8c9d', '2022-12-31T00:00:00Z', JSON'{}', 5, 901, 'will be deleted', NULL, NULL);
-
-                -- new records from the current sync
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 2, "_ab_cdc_lsn": 10001, "_ab_cdc_deleted_at": null, "string": "alice"}', generate_uuid(), '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 2, "_ab_cdc_lsn": 10002, "_ab_cdc_deleted_at": null, "string": "alice2"}', generate_uuid(), '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 3, "_ab_cdc_lsn": 10003, "_ab_cdc_deleted_at": null, "string": "bob"}', generate_uuid(), '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 1, "_ab_cdc_lsn": 10004, "_ab_cdc_deleted_at": "2022-12-31T23:59:59Z"}', generate_uuid(), '2023-01-01T00:00:00Z'),
-                  (JSON'{"id": 0, "_ab_cdc_lsn": 10005, "_ab_cdc_deleted_at": null, "string": "zombie_returned"}', generate_uuid(), '2023-01-01T00:00:00Z'),
-                  -- CDC generally outputs an explicit null for deleted_at, but verify that we can also handle the case where deleted_at is unset.
-                  (JSON'{"id": 4, "_ab_cdc_lsn": 10006, "string": "charlie"}', generate_uuid(), '2023-01-01T00:00:00Z'),
-                  -- Verify that we can handle weird values in deleted_at
-                  (JSON'{"id": 5, "_ab_cdc_lsn": 10007, "_ab_cdc_deleted_at": {}, "string": "david"}', generate_uuid(), '2023-01-01T00:00:00Z');
-                """))
+                    -- records from a previous sync
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`, `_airbyte_loaded_at`) VALUES
+                      (JSON'{"id": 1, "_ab_cdc_lsn": 900, "string": "spooky ghost", "_ab_cdc_deleted_at": null}', '64f4390f-3da1-4b65-b64a-a6c67497f18d', '2022-12-31T00:00:00Z', '2022-12-31T00:00:01Z'),
+                      (JSON'{"id": 0, "_ab_cdc_lsn": 901, "string": "zombie", "_ab_cdc_deleted_at": "2022-12-31T00:O0:00Z"}', generate_uuid(), '2022-12-31T00:00:00Z', '2022-12-31T00:00:01Z'),
+                      (JSON'{"id": 5, "_ab_cdc_lsn": 902, "string": "will be deleted", "_ab_cdc_deleted_at": null}', 'b6139181-a42c-45c3-89f2-c4b4bb3a8c9d', '2022-12-31T00:00:00Z', '2022-12-31T00:00:01Z');
+                    INSERT INTO ${dataset}.users_final (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `_ab_cdc_lsn`, `string`, `struct`, `integer`) values
+                      ('64f4390f-3da1-4b65-b64a-a6c67497f18d', '2022-12-31T00:00:00Z', JSON'{}', 1, 900, 'spooky ghost', NULL, NULL),
+                      ('b6139181-a42c-45c3-89f2-c4b4bb3a8c9d', '2022-12-31T00:00:00Z', JSON'{}', 5, 901, 'will be deleted', NULL, NULL);
+
+                    -- new records from the current sync
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 2, "_ab_cdc_lsn": 10001, "_ab_cdc_deleted_at": null, "string": "alice"}', generate_uuid(), '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 2, "_ab_cdc_lsn": 10002, "_ab_cdc_deleted_at": null, "string": "alice2"}', generate_uuid(), '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 3, "_ab_cdc_lsn": 10003, "_ab_cdc_deleted_at": null, "string": "bob"}', generate_uuid(), '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 1, "_ab_cdc_lsn": 10004, "_ab_cdc_deleted_at": "2022-12-31T23:59:59Z"}', generate_uuid(), '2023-01-01T00:00:00Z'),
+                      (JSON'{"id": 0, "_ab_cdc_lsn": 10005, "_ab_cdc_deleted_at": null, "string": "zombie_returned"}', generate_uuid(), '2023-01-01T00:00:00Z'),
+                      -- CDC generally outputs an explicit null for deleted_at, but verify that we can also handle the case where deleted_at is unset.
+                      (JSON'{"id": 4, "_ab_cdc_lsn": 10006, "string": "charlie"}', generate_uuid(), '2023-01-01T00:00:00Z'),
+                      -- Verify that we can handle weird values in deleted_at
+                      (JSON'{"id": 5, "_ab_cdc_lsn": 10007, "_ab_cdc_deleted_at": {}, "string": "david"}', generate_uuid(), '2023-01-01T00:00:00Z');
+                    """))
         .build());
 
     final String sql = GENERATOR.updateTable("", cdcStreamConfig());
@@ -678,18 +643,18 @@ public void testCdcOrdering_updateAfterDelete() throws InterruptedException {
     createRawTable();
     createFinalTableCdc();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                -- Write raw deletion record from the first batch, which resulted in an empty final table.
-                -- Note the non-null loaded_at - this is to simulate that we previously ran T+D on this record.
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`, `_airbyte_loaded_at`) VALUES
-                  (JSON'{"id": 1, "_ab_cdc_lsn": 10001, "_ab_cdc_deleted_at": "2023-01-01T00:01:00Z"}', generate_uuid(), '2023-01-01T00:00:00Z', '2023-01-01T00:00:01Z');
-
-                -- insert raw record from the second record batch - this is an outdated record that should be ignored.
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "_ab_cdc_lsn": 10000, "string": "alice"}', generate_uuid(), '2023-01-01T00:00:00Z');
-                """))
+                    -- Write raw deletion record from the first batch, which resulted in an empty final table.
+                    -- Note the non-null loaded_at - this is to simulate that we previously ran T+D on this record.
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`, `_airbyte_loaded_at`) VALUES
+                      (JSON'{"id": 1, "_ab_cdc_lsn": 10001, "_ab_cdc_deleted_at": "2023-01-01T00:01:00Z"}', generate_uuid(), '2023-01-01T00:00:00Z', '2023-01-01T00:00:01Z');
+
+                    -- insert raw record from the second record batch - this is an outdated record that should be ignored.
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "_ab_cdc_lsn": 10000, "string": "alice"}', generate_uuid(), '2023-01-01T00:00:00Z');
+                    """))
         .build());
 
     final String sql = GENERATOR.updateTable("", cdcStreamConfig());
@@ -724,19 +689,19 @@ public void testCdcOrdering_insertAfterDelete() throws InterruptedException {
     createRawTable();
     createFinalTableCdc();
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                -- records from the first batch
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`, `_airbyte_loaded_at`) VALUES
-                  (JSON'{"id": 1, "_ab_cdc_lsn": 10002, "string": "alice_reinsert"}', '64f4390f-3da1-4b65-b64a-a6c67497f18d', '2023-01-01T00:00:00Z', '2023-01-01T00:00:01Z');
-                INSERT INTO ${dataset}.users_final (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `_ab_cdc_lsn`, `string`) values
-                  ('64f4390f-3da1-4b65-b64a-a6c67497f18d', '2023-01-01T00:00:00Z', JSON'{}', 1, 10002, 'alice_reinsert');
-
-                -- second record batch
-                INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
-                  (JSON'{"id": 1, "_ab_cdc_lsn": 10001, "_ab_cdc_deleted_at": "2023-01-01T00:01:00Z"}', generate_uuid(), '2023-01-01T00:00:00Z');
-                """))
+                    -- records from the first batch
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`, `_airbyte_loaded_at`) VALUES
+                      (JSON'{"id": 1, "_ab_cdc_lsn": 10002, "string": "alice_reinsert"}', '64f4390f-3da1-4b65-b64a-a6c67497f18d', '2023-01-01T00:00:00Z', '2023-01-01T00:00:01Z');
+                    INSERT INTO ${dataset}.users_final (_airbyte_raw_id, _airbyte_extracted_at, _airbyte_meta, `id`, `_ab_cdc_lsn`, `string`) values
+                      ('64f4390f-3da1-4b65-b64a-a6c67497f18d', '2023-01-01T00:00:00Z', JSON'{}', 1, 10002, 'alice_reinsert');
+
+                    -- second record batch
+                    INSERT INTO ${dataset}.users_raw (`_airbyte_data`, `_airbyte_raw_id`, `_airbyte_extracted_at`) VALUES
+                      (JSON'{"id": 1, "_ab_cdc_lsn": 10001, "_ab_cdc_deleted_at": "2023-01-01T00:01:00Z"}', generate_uuid(), '2023-01-01T00:00:00Z');
+                    """))
         .build());
     // Run the second round of typing and deduping. This should do nothing to the final table, because
     // the delete is outdated.
@@ -808,18 +773,18 @@ private StreamConfig fullRefreshOverwriteStreamConfig() {
   // Some of them are identical to what the sql generator does, and that's intentional.
   private void createRawTable() throws InterruptedException {
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                CREATE TABLE ${dataset}.users_raw (
-                  _airbyte_raw_id STRING NOT NULL,
-                  _airbyte_data JSON NOT NULL,
-                  _airbyte_extracted_at TIMESTAMP NOT NULL,
-                  _airbyte_loaded_at TIMESTAMP
-                ) PARTITION BY (
-                  DATE_TRUNC(_airbyte_extracted_at, DAY)
-                ) CLUSTER BY _airbyte_loaded_at;
-                """))
+                    CREATE TABLE ${dataset}.users_raw (
+                      _airbyte_raw_id STRING NOT NULL,
+                      _airbyte_data JSON NOT NULL,
+                      _airbyte_extracted_at TIMESTAMP NOT NULL,
+                      _airbyte_loaded_at TIMESTAMP
+                    ) PARTITION BY (
+                      DATE_TRUNC(_airbyte_extracted_at, DAY)
+                    ) CLUSTER BY _airbyte_loaded_at;
+                    """))
         .build());
   }
 
@@ -829,63 +794,63 @@ private void createFinalTable() throws InterruptedException {
 
   private void createFinalTable(String suffix) throws InterruptedException {
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset,
-            "suffix", suffix)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset,
+                "suffix", suffix)).replace(
                 """
-                CREATE TABLE ${dataset}.users_final${suffix} (
-                  _airbyte_raw_id STRING NOT NULL,
-                  _airbyte_extracted_at TIMESTAMP NOT NULL,
-                  _airbyte_meta JSON NOT NULL,
-                  `id` INT64,
-                  `updated_at` TIMESTAMP,
-                  `struct` JSON,
-                  `array` JSON,
-                  `string` STRING,
-                  `number` NUMERIC,
-                  `integer` INT64,
-                  `boolean` BOOL,
-                  `timestamp_with_timezone` TIMESTAMP,
-                  `timestamp_without_timezone` DATETIME,
-                  `time_with_timezone` STRING,
-                  `time_without_timezone` TIME,
-                  `date` DATE,
-                  `unknown` JSON
-                )
-                PARTITION BY (DATE_TRUNC(_airbyte_extracted_at, DAY))
-                CLUSTER BY id, _airbyte_extracted_at;
-                """))
+                    CREATE TABLE ${dataset}.users_final${suffix} (
+                      _airbyte_raw_id STRING NOT NULL,
+                      _airbyte_extracted_at TIMESTAMP NOT NULL,
+                      _airbyte_meta JSON NOT NULL,
+                      `id` INT64,
+                      `updated_at` TIMESTAMP,
+                      `struct` JSON,
+                      `array` JSON,
+                      `string` STRING,
+                      `number` NUMERIC,
+                      `integer` INT64,
+                      `boolean` BOOL,
+                      `timestamp_with_timezone` TIMESTAMP,
+                      `timestamp_without_timezone` DATETIME,
+                      `time_with_timezone` STRING,
+                      `time_without_timezone` TIME,
+                      `date` DATE,
+                      `unknown` JSON
+                    )
+                    PARTITION BY (DATE_TRUNC(_airbyte_extracted_at, DAY))
+                    CLUSTER BY id, _airbyte_extracted_at;
+                    """))
         .build());
   }
 
   private void createFinalTableCdc() throws InterruptedException {
     bq.query(QueryJobConfiguration.newBuilder(
-        new StringSubstitutor(Map.of(
-            "dataset", testDataset)).replace(
+            new StringSubstitutor(Map.of(
+                "dataset", testDataset)).replace(
                 """
-                CREATE TABLE ${dataset}.users_final (
-                  _airbyte_raw_id STRING NOT NULL,
-                  _airbyte_extracted_at TIMESTAMP NOT NULL,
-                  _airbyte_meta JSON NOT NULL,
-                  `id` INT64,
-                  `_ab_cdc_deleted_at` TIMESTAMP,
-                  `_ab_cdc_lsn` INT64,
-                  `struct` JSON,
-                  `array` JSON,
-                  `string` STRING,
-                  `number` NUMERIC,
-                  `integer` INT64,
-                  `boolean` BOOL,
-                  `timestamp_with_timezone` TIMESTAMP,
-                  `timestamp_without_timezone` DATETIME,
-                  `time_with_timezone` STRING,
-                  `time_without_timezone` TIME,
-                  `date` DATE,
-                  `unknown` JSON
-                )
-                PARTITION BY (DATE_TRUNC(_airbyte_extracted_at, DAY))
-                CLUSTER BY id, _airbyte_extracted_at;
-                """))
+                    CREATE TABLE ${dataset}.users_final (
+                      _airbyte_raw_id STRING NOT NULL,
+                      _airbyte_extracted_at TIMESTAMP NOT NULL,
+                      _airbyte_meta JSON NOT NULL,
+                      `id` INT64,
+                      `_ab_cdc_deleted_at` TIMESTAMP,
+                      `_ab_cdc_lsn` INT64,
+                      `struct` JSON,
+                      `array` JSON,
+                      `string` STRING,
+                      `number` NUMERIC,
+                      `integer` INT64,
+                      `boolean` BOOL,
+                      `timestamp_with_timezone` TIMESTAMP,
+                      `timestamp_without_timezone` DATETIME,
+                      `time_with_timezone` STRING,
+                      `time_without_timezone` TIME,
+                      `date` DATE,
+                      `unknown` JSON
+                    )
+                    PARTITION BY (DATE_TRUNC(_airbyte_extracted_at, DAY))
+                    CLUSTER BY id, _airbyte_extracted_at;
+                    """))
         .build());
   }
 
@@ -899,10 +864,10 @@ private static void logAndExecute(final String sql) throws InterruptedException
    * That's annoying for us since we're working with small test data, so pull everything into a list, and convert them
    * into maps of column name -> value.
    * <p>
-   * Note that the values have reasonable types; see {@link #toMap(Schema, FieldValueList)} for details.
+   * Note that the values have reasonable types; see {@link #toJson(Schema, FieldValueList)} for details.
    */
-  public static List<LinkedHashMap<String, Object>> toMaps(TableResult result) {
-    return result.streamAll().map(row -> toMap(result.getSchema(), row)).toList();
+  public static List<JsonNode> toJsonRecords(TableResult result) {
+    return result.streamAll().map(row -> toJson(result.getSchema(), row)).toList();
   }
 
   /**
@@ -911,209 +876,33 @@ public static List<LinkedHashMap<String, Object>> toMaps(TableResult result) {
    * <p>
    * SQL nulls are represented as explicit null values. JSON nulls are represented as {@link com.fasterxml.jackson.databind.node.NullNode}.
    */
-  private static LinkedHashMap<String, Object> toMap(Schema schema, FieldValueList row) {
-    final LinkedHashMap<String, Object> map = new LinkedHashMap<>();
+  private static JsonNode toJson(Schema schema, FieldValueList row) {
+    final ObjectNode json = (ObjectNode) Jsons.emptyObject();
     for (int i = 0; i < schema.getFields().size(); i++) {
       final Field field = schema.getFields().get(i);
       final FieldValue value = row.get(i);
-      Object typedValue;
-      if (value.isNull()) {
-        typedValue = null;
-      } else {
+      JsonNode typedValue;
+      if (!value.isNull()) {
         typedValue = switch (field.getType().getStandardType()) {
-          case BOOL -> value.getBooleanValue();
-          case INT64 -> value.getLongValue();
-          case FLOAT64 -> value.getDoubleValue();
-          case NUMERIC, BIGNUMERIC -> value.getNumericValue();
-          case STRING -> value.getStringValue();
-          case BYTES -> value.getBytesValue();
-          case TIMESTAMP -> value.getTimestampInstant();
+          case BOOL -> Jsons.jsonNode(value.getBooleanValue());
+          case INT64 -> Jsons.jsonNode(value.getLongValue());
+          case FLOAT64 -> Jsons.jsonNode(value.getDoubleValue());
+          case NUMERIC, BIGNUMERIC -> Jsons.jsonNode(value.getNumericValue());
+          case STRING -> Jsons.jsonNode(value.getStringValue());
+          // naively converting an Instant returns a DecimalNode with the unix epoch, so instead we manually stringify it
+          case TIMESTAMP -> Jsons.jsonNode(value.getTimestampInstant().toString());
           // value.getTimestampInstant() fails to parse these types
-          case DATE, DATETIME, TIME -> value.getStringValue();
+          case DATE, DATETIME, TIME -> Jsons.jsonNode(value.getStringValue());
           // bigquery returns JSON columns as string; manually parse it into a JsonNode
-          case JSON -> Jsons.deserialize(value.getStringValue());
+          case JSON -> Jsons.jsonNode(Jsons.deserialize(value.getStringValue()));
 
           // Default case for weird types (struct, array, geography, interval)
-          default -> value.getStringValue();
+          default -> Jsons.jsonNode(value.getStringValue());
         };
+        json.set(field.getName(), typedValue);
       }
-      map.put(field.getName(), typedValue);
-    }
-    return map;
-  }
-
-  /**
-   * Asserts that the expected rows match the query result. Please don't read this code. Trust the
-   * logs.
-   */
-  private void assertQueryResult(final List<Map<String, Optional<Object>>> expectedRows, final TableResult result) {
-    List<LinkedHashMap<String, Object>> actualRows = toMaps(result);
-    List<Map<String, Optional<Object>>> missingRows = new ArrayList<>();
-    Set<Map<String, Object>> matchedRows = new HashSet<>();
-    boolean foundMultiMatch = false;
-    // For each expected row, iterate through all actual rows to find a match.
-    for (Map<String, Optional<Object>> expectedRow : expectedRows) {
-      final List<LinkedHashMap<String, Object>> matchingRows = actualRows.stream().filter(actualRow -> {
-        // We only want to check the fields that are specified in the expected row.
-        // E.g.we shouldn't assert against randomized UUIDs.
-        for (Entry<String, Optional<Object>> expectedEntry : expectedRow.entrySet()) {
-          // If the expected value is empty, we just check that the actual value is null.
-          if (expectedEntry.getValue().isEmpty()) {
-            if (actualRow.get(expectedEntry.getKey()) != null) {
-              // It wasn't null, so this actualRow doesn't match the expected row
-              return false;
-            } else {
-              // It _was_ null, so we can move on the next key.
-              continue;
-            }
-          }
-          // If the expected value is non-empty, we check that the actual value matches.
-          if (!expectedEntry.getValue().get().equals(actualRow.get(expectedEntry.getKey()))) {
-            return false;
-          }
-        }
-        return true;
-      }).toList();
-
-      if (matchingRows.size() == 0) {
-        missingRows.add(expectedRow);
-      } else if (matchingRows.size() > 1) {
-        foundMultiMatch = true;
-      }
-      matchedRows.addAll(matchingRows);
-    }
-
-    // TODO is the foundMultiMatch condition correct? E.g. what if we try to write the same row twice
-    // (because of a retry)? Are we
-    // guaranteed to have some differentiator?
-    if (foundMultiMatch || !missingRows.isEmpty() || matchedRows.size() != actualRows.size()) {
-      Set<Map<String, Object>> extraRows = actualRows.stream().filter(row -> !matchedRows.contains(row)).collect(toSet());
-      fail(diff(missingRows, extraRows));
-    }
-  }
-
-  private static String sortedToString(Map<String, Object> record) {
-    return sortedToString(record, Function.identity());
-  }
-
-  private static <T> String sortedToString(Map<String, T> record, Function<T, ?> valueMapper) {
-    return "{"
-        + record.entrySet().stream()
-            .sorted(Entry.comparingByKey())
-            .map(entry -> entry.getKey() + "=" + valueMapper.apply(entry.getValue()))
-            .collect(Collectors.joining(", "))
-        + "}";
-  }
-
-  /**
-   * Attempts to generate a pretty-print diff of the rows. Output will look something like:
-   * {@code Missing row: {id=1} Extra row: {id=2} Mismatched row: id=3; foo_column expected String
-   * arst, got Long 42 }
-   *
-   * Assumes that rows with the same id and cursor are the same row.
-   */
-  private static String diff(List<Map<String, Optional<Object>>> missingRowsRaw, Set<Map<String, Object>> extraRowsRaw) {
-    List<Map<String, Object>> missingRows = missingRowsRaw.stream()
-        .map(row -> {
-          // Extract everything from inside the optionals.
-          Map<String, Object> newRow = new HashMap<>();
-          for (Entry<String, Optional<Object>> entry : row.entrySet()) {
-            newRow.put(entry.getKey(), entry.getValue().orElse(null));
-          }
-          return newRow;
-        }).sorted(ROW_COMPARATOR)
-        .toList();
-
-    List<Map<String, Object>> extraRows = extraRowsRaw.stream().sorted(ROW_COMPARATOR).toList();
-
-    String output = "";
-    int missingIndex = 0;
-    int extraIndex = 0;
-    while (missingIndex < missingRows.size() && extraIndex < extraRows.size()) {
-      Map<String, Object> missingRow = missingRows.get(missingIndex);
-      Map<String, Object> extraRow = extraRows.get(extraIndex);
-      int compare = ROW_COMPARATOR.compare(missingRow, extraRow);
-      if (compare < 0) {
-        // missing row is too low - we should print missing rows until we catch up
-        output += "Missing row: " + sortedToString(missingRow) + "\n";
-        missingIndex++;
-      } else if (compare == 0) {
-        // rows match - we should print the diff between them
-        output += "Mismatched row: ";
-        if (missingRow.containsKey(ID_COLUMN.name())) {
-          output += "id=" + missingRow.get(ID_COLUMN.name()) + "; ";
-        }
-        if (missingRow.containsKey(CURSOR.name())) {
-          output += "updated_at=" + missingRow.get(CURSOR.name()) + "; ";
-        }
-        if (missingRow.containsKey(CDC_CURSOR.name())) {
-          output += "_ab_cdc_lsn=" + missingRow.get(CDC_CURSOR.name()) + "; ";
-        }
-        output += "\n";
-        for (String key : missingRow.keySet().stream().sorted().toList()) {
-          Object missingValue = missingRow.get(key);
-          Object extraValue = extraRow.get(key);
-          if (!Objects.equals(missingValue, extraValue)) {
-            output += "  " + key + " expected " + getClassAndValue(missingValue) + ", got " + getClassAndValue(extraValue) + "\n";
-          }
-        }
-
-        missingIndex++;
-        extraIndex++;
-      } else {
-        // extra row is too low - we should print extra rows until we catch up
-        output += "Extra row: " + sortedToString(extraRow) + "\n";
-        extraIndex++;
-      }
-    }
-    while (missingIndex < missingRows.size()) {
-      Map<String, Object> missingRow = missingRows.get(missingIndex);
-      output += "Missing row: " + sortedToString(missingRow) + "\n";
-      missingIndex++;
-    }
-    while (extraIndex < extraRows.size()) {
-      Map<String, Object> extraRow = extraRows.get(extraIndex);
-      output += "Extra row: " + sortedToString(extraRow) + "\n";
-      extraIndex++;
-    }
-    return output;
-  }
-
-  /**
-   * Compare two rows on the given column. Sorts nulls first. If the values are not the same type,
-   * assumes the left value is smaller.
-   */
-  private static int compareRowsOnColumn(String column, Map<String, Object> row1, Map<String, Object> row2) {
-    Comparable<?> r1id = (Comparable<?>) row1.get(column);
-    Comparable<?> r2id = (Comparable<?>) row2.get(column);
-    if (r1id == null) {
-      if (r2id == null) {
-        return 0;
-      } else {
-        return -1;
-      }
-    } else {
-      if (r2id == null) {
-        return 1;
-      } else {
-        if (r1id.getClass().equals(r2id.getClass())) {
-          // We're doing some very sketchy type-casting nonsense here, but it's guarded by the class equality
-          // check.
-          return ((Comparable) r1id).compareTo(r2id);
-        } else {
-          // Both values are non-null, but they're not the same type. Assume left is smaller.
-          return -1;
-        }
-      }
-    }
-  }
-
-  private static String getClassAndValue(Object o) {
-    if (o == null) {
-      return null;
-    } else {
-      return o.getClass().getSimpleName() + " " + o;
     }
+    return json;
   }
 
 }

From 844bba66f733a6e92cb46570acc4e04b7641a474 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 10:56:52 -0700
Subject: [PATCH 25/46] fix

---
 .../base/destination/typing_deduping/RecordDiffer.java    | 8 ++++----
 .../BigQuerySqlGeneratorIntegrationTest.java              | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 5e4385c0f4e9..96607adde3de 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -50,11 +50,11 @@ public RecordDiffer(Pair<String, AirbyteType>... columns) {
 
     rawRecordIdentityExtractor = record -> Arrays.stream(columns)
         .map(column -> getPrintableFieldIfPresent(record.get("_airbyte_data"), column.getKey()))
-        .collect(Collectors.joining("; "))
+        .collect(Collectors.joining(", "))
         + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
     finalRecordIdentityExtractor = record -> Arrays.stream(columns)
         .map(column -> getPrintableFieldIfPresent(record, column.getKey()))
-        .collect(Collectors.joining("; "))
+        .collect(Collectors.joining(", "))
         + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
   }
 
@@ -99,7 +99,7 @@ public void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode>
 
   private static String getPrintableFieldIfPresent(JsonNode record, String field) {
     if (record.has(field)) {
-      return field + "=" + record.get(field) + "; ";
+      return field + "=" + record.get(field);
     } else {
       return "";
     }
@@ -139,7 +139,7 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
       if (compare == 0) {
         // These records should be the same. Find the specific fields that are different.
         boolean foundMismatch = false;
-        String mismatchedRecordMessage = "Row had incorrect data:" + recordIdExtractor.apply(expectedRecord) + "\n";
+        String mismatchedRecordMessage = "Row had incorrect data: " + recordIdExtractor.apply(expectedRecord) + "\n";
         // Iterate through each column in the expected record and compare it to the actual record's value.
         for (String column : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
           if (extractRawData && "_airbyte_data".equals(column)) {
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
index 2c88a54b74c9..e04a963c2fec 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
@@ -333,7 +333,7 @@ public void testDedupRawTable() throws InterruptedException {
     logAndExecute(sql);
 
     final TableResult result = bq.query(QueryJobConfiguration.newBuilder("SELECT * FROM " + streamId.rawTableId(QUOTE)).build());
-    DIFFER.diffFinalTableRecords(
+    DIFFER.diffRawTableRecords(
         List.of(
             Jsons.deserialize(
                 """

From 0f1e7ab62de8c359958ee0eb892216213f7e6100 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 10:59:47 -0700
Subject: [PATCH 26/46] comment

---
 .../base/destination/typing_deduping/RecordDiffer.java | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 96607adde3de..daed5a4f2bbc 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -106,9 +106,13 @@ private static String getPrintableFieldIfPresent(JsonNode record, String field)
   }
 
   /**
-   * Generate a human-readable diff between the two lists. Only checks the keys specified in
-   * expectedRecords. Assumes (in general) that two records with the same PK, cursor, and extracted_at
-   * are the same record.
+   * Generate a human-readable diff between the two lists. Assumes (in general) that two records with
+   * the same PK, cursor, and extracted_at are the same record.
+   * <p>
+   * Verifies that all values specified in the expected records are correct (_including_ raw_id), and
+   * that no other fields are present (except for loaded_at and raw_id). We assume that it's impossible
+   * to verify loaded_at, since it's generated dynamically; however, we do provide the ability to assert
+   * on the exact raw_id if desired; we simply assume that raw_id is always expected to be present.
    *
    * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
    *        PK+cursor+extracted_at)

From dc5ab242f700e426f9436060db429b21df6b1921 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 11:04:12 -0700
Subject: [PATCH 27/46] naming+comment

---
 .../destination/typing_deduping/RecordDiffer.java   | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index daed5a4f2bbc..c3ee5bba7234 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -35,11 +35,16 @@ public class RecordDiffer {
   private final Comparator<JsonNode> finalRecordSortComparator;
   private final Function<JsonNode, String> finalRecordIdentityExtractor;
 
-  public RecordDiffer(Pair<String, AirbyteType>... columns) {
+  /**
+   * @param identifyingColumns Which fields constitute a unique record (typically PK+cursor). Do _not_
+   *                           include extracted_at; it is handled automatically.
+   */
+  public RecordDiffer(Pair<String, AirbyteType>... identifyingColumns) {
     // Start with a noop comparator for convenience
+    // The raw and final stuff are almost identical, except the raw version has to extract _airbyte_data first.
     Comparator<JsonNode> rawIdComp = Comparator.comparing(record -> 0);
     Comparator<JsonNode> finalIdComp = Comparator.comparing(record -> 0);
-    for (Pair<String, AirbyteType> column : columns) {
+    for (Pair<String, AirbyteType> column : identifyingColumns) {
       rawIdComp = rawIdComp.thenComparing(record -> extract(record.get("_airbyte_data"), column.getKey(), column.getValue()));
       finalIdComp = finalIdComp.thenComparing(record -> extract(record, column.getKey(), column.getValue()));
     }
@@ -48,11 +53,11 @@ public RecordDiffer(Pair<String, AirbyteType>... columns) {
     this.finalRecordIdentityComparator = finalIdComp.thenComparing(record -> asTimestampWithTimezone(record.get("_airbyte_extracted_at")));
     this.finalRecordSortComparator = finalRecordIdentityComparator.thenComparing(record -> asString(record.get("_airbyte_raw_id")));
 
-    rawRecordIdentityExtractor = record -> Arrays.stream(columns)
+    rawRecordIdentityExtractor = record -> Arrays.stream(identifyingColumns)
         .map(column -> getPrintableFieldIfPresent(record.get("_airbyte_data"), column.getKey()))
         .collect(Collectors.joining(", "))
         + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
-    finalRecordIdentityExtractor = record -> Arrays.stream(columns)
+    finalRecordIdentityExtractor = record -> Arrays.stream(identifyingColumns)
         .map(column -> getPrintableFieldIfPresent(record, column.getKey()))
         .collect(Collectors.joining(", "))
         + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");

From a607793dc05ed7b59e182af6c2a35d1870953cc0 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 11:09:43 -0700
Subject: [PATCH 28/46] one more comment

---
 .../base/destination/typing_deduping/RecordDiffer.java           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index c3ee5bba7234..38d772d9e5cc 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -353,6 +353,7 @@ private static LocalDate asDate(JsonNode node) {
     }
   }
 
+  // Generics? Never heard of 'em. (I'm sorry)
   private static Comparable extract(JsonNode node, String field, AirbyteType type) {
     if (type instanceof AirbyteProtocolType t) {
       return switch (t) {

From 84c387da95ab03b77b1ab7018ad78ea8d69f07e1 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 11:18:18 -0700
Subject: [PATCH 29/46] better assert

---
 .../destination/typing_deduping/RecordDiffer.java     | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 38d772d9e5cc..88e4feb751b5 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -1,7 +1,6 @@
 package io.airbyte.integrations.base.destination.typing_deduping;
 
-import static org.junit.jupiter.api.Assertions.assertAll;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.*;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.google.common.collect.Streams;
@@ -87,7 +86,9 @@ public void diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> a
         rawRecordIdentityExtractor,
         true);
 
-    assertTrue(diff.isEmpty(), "Raw table was incorrect.\n" + diff);
+    if (!diff.isEmpty()) {
+      fail("Raw table was incorrect.\n" + diff);
+    }
   }
 
   public void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
@@ -99,7 +100,9 @@ public void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode>
         finalRecordIdentityExtractor,
         false);
 
-    assertTrue(diff.isEmpty(), "Final table was incorrect.\n" + diff);
+    if (!diff.isEmpty()) {
+      fail("Final table was incorrect.\n" + diff);
+    }
   }
 
   private static String getPrintableFieldIfPresent(JsonNode record, String field) {

From ffa9df0afc2f7604e2f5b409995c7ee2fb7a87e2 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 11:19:47 -0700
Subject: [PATCH 30/46] remove unnecessary thing

---
 .../typing_deduping/AirbyteType.java          | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/AirbyteType.java b/airbyte-integrations/bases/base-typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/AirbyteType.java
index 2d3b0628e8c4..7c6dcc28597d 100644
--- a/airbyte-integrations/bases/base-typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/AirbyteType.java
+++ b/airbyte-integrations/bases/base-typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/AirbyteType.java
@@ -74,8 +74,6 @@ static AirbyteType fromJsonSchema(final JsonNode schema) {
     return AirbyteTypeUtils.getAirbyteProtocolType(schema);
   }
 
-  public LinkedHashMap<String, AirbyteType> asColumns();
-
   private static Struct getStruct(final JsonNode schema) {
     final LinkedHashMap<String, AirbyteType> propertiesMap = new LinkedHashMap<>();
     final JsonNode properties = schema.get("properties");
@@ -109,11 +107,6 @@ public static AirbyteProtocolType matches(final String type) {
       }
     }
 
-    @Override
-    public LinkedHashMap<String, AirbyteType> asColumns() {
-      throw new UnsupportedOperationException("Basic types cannot be converted to columns.");
-    }
-
   }
 
   /**
@@ -121,18 +114,10 @@ public LinkedHashMap<String, AirbyteType> asColumns() {
    */
   record Struct(LinkedHashMap<String, AirbyteType> properties) implements AirbyteType {
 
-    @Override
-    public LinkedHashMap<String, AirbyteType> asColumns() {
-      return properties;
-    }
   }
 
   record Array(AirbyteType items) implements AirbyteType {
 
-    @Override
-    public LinkedHashMap<String, AirbyteType> asColumns() {
-      throw new UnsupportedOperationException("Arrays cannot be converted to columns.");
-    }
   }
 
   /**
@@ -142,10 +127,6 @@ public LinkedHashMap<String, AirbyteType> asColumns() {
    */
   record UnsupportedOneOf(List<AirbyteType> options) implements AirbyteType {
 
-    @Override
-    public LinkedHashMap<String, AirbyteType> asColumns() {
-      throw new UnsupportedOperationException("OneOf cannot be converted to columns.");
-    }
   }
 
   /**

From ffd3e3f28e7cd47a98ef146010878b9a985d8cf4 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 3 Jul 2023 11:24:17 -0700
Subject: [PATCH 31/46] one last thing

---
 .../BigQuerySqlGeneratorIntegrationTest.java         | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
index e04a963c2fec..fd3c9bb20088 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/BigQuerySqlGeneratorIntegrationTest.java
@@ -861,10 +861,7 @@ private static void logAndExecute(final String sql) throws InterruptedException
 
   /**
    * TableResult contains records in a somewhat nonintuitive format (and it avoids loading them all into memory).
-   * That's annoying for us since we're working with small test data, so pull everything into a list, and convert them
-   * into maps of column name -> value.
-   * <p>
-   * Note that the values have reasonable types; see {@link #toJson(Schema, FieldValueList)} for details.
+   * That's annoying for us since we're working with small test data, so just pull everything into a list.
    */
   public static List<JsonNode> toJsonRecords(TableResult result) {
     return result.streamAll().map(row -> toJson(result.getSchema(), row)).toList();
@@ -872,9 +869,8 @@ public static List<JsonNode> toJsonRecords(TableResult result) {
 
   /**
    * FieldValueList stores everything internally as string (I think?) but provides conversions to more useful types.
-   * This method does that conversion, using the schema to determine which type is most appropriate.
-   * <p>
-   * SQL nulls are represented as explicit null values. JSON nulls are represented as {@link com.fasterxml.jackson.databind.node.NullNode}.
+   * This method does that conversion, using the schema to determine which type is most appropriate. Then we just dump
+   * everything into a jsonnode for interop with RecordDiffer.
    */
   private static JsonNode toJson(Schema schema, FieldValueList row) {
     final ObjectNode json = (ObjectNode) Jsons.emptyObject();
@@ -896,7 +892,7 @@ private static JsonNode toJson(Schema schema, FieldValueList row) {
           // bigquery returns JSON columns as string; manually parse it into a JsonNode
           case JSON -> Jsons.jsonNode(Jsons.deserialize(value.getStringValue()));
 
-          // Default case for weird types (struct, array, geography, interval)
+          // Default case for weird types (struct, array, geography, interval, bytes)
           default -> Jsons.jsonNode(value.getStringValue());
         };
         json.set(field.getName(), typedValue);

From 3dbaa160f0367ad2eb0e8dc43d09d71a7dbf4622 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Mon, 3 Jul 2023 23:08:13 +0000
Subject: [PATCH 32/46] Automated Commit - Formatting Changes

---
 .../BaseTypingDedupingTest.java               | 13 +------
 .../typing_deduping/RecordDiffer.java         | 38 +++++++++++--------
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index a8e82447f661..c8051c58cad9 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -4,12 +4,8 @@
 
 package io.airbyte.integrations.base.destination.typing_deduping;
 
-import static org.junit.jupiter.api.Assertions.assertAll;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
 import com.fasterxml.jackson.databind.JsonNode;
 import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Streams;
 import io.airbyte.commons.features.EnvVariableFeatureFlags;
 import io.airbyte.commons.json.Jsons;
 import io.airbyte.commons.lang.Exceptions;
@@ -30,15 +26,9 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.time.Instant;
 import java.util.Collections;
-import java.util.Comparator;
-import java.util.LinkedHashMap;
 import java.util.List;
-import java.util.Map;
-import java.util.Objects;
 import java.util.UUID;
-import java.util.function.Function;
 import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.junit.jupiter.api.AfterEach;
@@ -81,8 +71,7 @@ public abstract class BaseTypingDedupingTest {
   private static final RecordDiffer DIFFER = new RecordDiffer(
       Pair.of("id1", AirbyteProtocolType.INTEGER),
       Pair.of("id2", AirbyteProtocolType.INTEGER),
-      Pair.of("updated_at", AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE)
-  );
+      Pair.of("updated_at", AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE));
 
   private String randomSuffix;
   private JsonNode config;
diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 88e4feb751b5..0bc8543a3c24 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -1,3 +1,7 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
 package io.airbyte.integrations.base.destination.typing_deduping;
 
 import static org.junit.jupiter.api.Assertions.*;
@@ -23,7 +27,8 @@
 import org.apache.commons.lang3.tuple.Pair;
 
 /**
- * Utility class to generate human-readable diffs between expected and actual records. Assumes 1s1t output format.
+ * Utility class to generate human-readable diffs between expected and actual records. Assumes 1s1t
+ * output format.
  */
 public class RecordDiffer {
 
@@ -36,11 +41,12 @@ public class RecordDiffer {
 
   /**
    * @param identifyingColumns Which fields constitute a unique record (typically PK+cursor). Do _not_
-   *                           include extracted_at; it is handled automatically.
+   *        include extracted_at; it is handled automatically.
    */
   public RecordDiffer(Pair<String, AirbyteType>... identifyingColumns) {
     // Start with a noop comparator for convenience
-    // The raw and final stuff are almost identical, except the raw version has to extract _airbyte_data first.
+    // The raw and final stuff are almost identical, except the raw version has to extract _airbyte_data
+    // first.
     Comparator<JsonNode> rawIdComp = Comparator.comparing(record -> 0);
     Comparator<JsonNode> finalIdComp = Comparator.comparing(record -> 0);
     for (Pair<String, AirbyteType> column : identifyingColumns) {
@@ -63,9 +69,9 @@ public RecordDiffer(Pair<String, AirbyteType>... identifyingColumns) {
   }
 
   /**
-   * In the expected records, a SQL null is represented as a JsonNode without that field at all, and a JSON null is
-   * represented as a NullNode. For example, in the JSON blob {"name":  null}, the `name` field is a JSON null, and the
-   * `address` field is a SQL null.
+   * In the expected records, a SQL null is represented as a JsonNode without that field at all, and a
+   * JSON null is represented as a NullNode. For example, in the JSON blob {"name": null}, the `name`
+   * field is a JSON null, and the `address` field is a SQL null.
    */
   public void verifySyncResult(List<JsonNode> expectedRawRecords,
                                List<JsonNode> actualRawRecords,
@@ -73,8 +79,7 @@ public void verifySyncResult(List<JsonNode> expectedRawRecords,
                                List<JsonNode> actualFinalRecords) {
     assertAll(
         () -> diffRawTableRecords(expectedRawRecords, actualRawRecords),
-        () -> diffFinalTableRecords(expectedFinalRecords, actualFinalRecords)
-    );
+        () -> diffFinalTableRecords(expectedFinalRecords, actualFinalRecords));
   }
 
   public void diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
@@ -118,9 +123,10 @@ private static String getPrintableFieldIfPresent(JsonNode record, String field)
    * the same PK, cursor, and extracted_at are the same record.
    * <p>
    * Verifies that all values specified in the expected records are correct (_including_ raw_id), and
-   * that no other fields are present (except for loaded_at and raw_id). We assume that it's impossible
-   * to verify loaded_at, since it's generated dynamically; however, we do provide the ability to assert
-   * on the exact raw_id if desired; we simply assume that raw_id is always expected to be present.
+   * that no other fields are present (except for loaded_at and raw_id). We assume that it's
+   * impossible to verify loaded_at, since it's generated dynamically; however, we do provide the
+   * ability to assert on the exact raw_id if desired; we simply assume that raw_id is always expected
+   * to be present.
    *
    * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
    *        PK+cursor+extracted_at)
@@ -237,8 +243,8 @@ private static boolean jsonNodesNotEquivalent(JsonNode expectedValue, JsonNode a
   /**
    * Verify that all fields in the actual record are present in the expected record. This is primarily
    * relevant for detecting fields that we expected to be null, but actually were not. See
-   * {@link BaseTypingDedupingTest#dumpFinalTableRecords(String, String)} for an explanation of how SQL/JSON nulls are
-   * represented in the expected record.
+   * {@link BaseTypingDedupingTest#dumpFinalTableRecords(String, String)} for an explanation of how
+   * SQL/JSON nulls are represented in the expected record.
    * <p>
    * This has the side benefit of detecting completely unexpected columns, which would be a very weird
    * bug but is probably still useful to catch.
@@ -255,8 +261,9 @@ private static LinkedHashMap<String, JsonNode> checkForExtraOrNonNullFields(Json
   }
 
   /**
-   * Produce a pretty-printed error message, e.g. " For column foo, expected 1 but got 2". The leading spaces are
-   * intentional, to make the message easier to read when it's embedded in a larger stacktrace.
+   * Produce a pretty-printed error message, e.g. " For column foo, expected 1 but got 2". The leading
+   * spaces are intentional, to make the message easier to read when it's embedded in a larger
+   * stacktrace.
    */
   private static String generateFieldError(String fieldname, JsonNode expectedValue, JsonNode actualValue) {
     String expectedString = expectedValue == null ? "SQL NULL (i.e. no value)" : expectedValue.toString();
@@ -375,4 +382,5 @@ private static Comparable extract(JsonNode node, String field, AirbyteType type)
       return node.toString();
     }
   }
+
 }

From 97f8c199083dedc6e1ca5ec761201877797f1cd3 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Wed, 5 Jul 2023 10:06:55 -0700
Subject: [PATCH 33/46] enable concurrent execution on all java integration
 tests

---
 .../typing_deduping/BaseTypingDedupingTest.java           | 3 ---
 .../connectors/destination-bigquery/build.gradle          | 8 --------
 .../src/main/groovy/airbyte-integration-test-java.gradle  | 5 +++++
 3 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index c8051c58cad9..8d264a5cd2e5 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -51,9 +51,6 @@
  * sync modes which use a cursor, the stream provides an updated_at field. The stream also has an
  * _ab_cdc_deleted_at field.
  */
-// Remember to set `'junit.jupiter.execution.parallel.enabled': 'true'` in your connector's
-// build.gradle.
-// See destination-bigquery for an example.
 // If you're running from inside intellij, you must run your specific subclass to get concurrent
 // execution.
 @Execution(ExecutionMode.CONCURRENT)
diff --git a/airbyte-integrations/connectors/destination-bigquery/build.gradle b/airbyte-integrations/connectors/destination-bigquery/build.gradle
index 2229ad250b72..a413759fed0f 100644
--- a/airbyte-integrations/connectors/destination-bigquery/build.gradle
+++ b/airbyte-integrations/connectors/destination-bigquery/build.gradle
@@ -52,11 +52,3 @@ configurations.all {
     force 'com.google.api-client:google-api-client:1.31.5'
   }
 }
-
-integrationTestJava {
-    systemProperties = [
-        'junit.jupiter.execution.parallel.enabled': 'true'
-        // TODO what's preventing us from turning this on? (probably a lot of things)
-        // 'junit.jupiter.execution.parallel.mode.default': 'concurrent'
-    ]
-}
diff --git a/buildSrc/src/main/groovy/airbyte-integration-test-java.gradle b/buildSrc/src/main/groovy/airbyte-integration-test-java.gradle
index a6938bc96791..e650889c417c 100644
--- a/buildSrc/src/main/groovy/airbyte-integration-test-java.gradle
+++ b/buildSrc/src/main/groovy/airbyte-integration-test-java.gradle
@@ -53,6 +53,11 @@ class AirbyteIntegrationTestJavaPlugin implements Plugin<Project> {
 
             // This is needed to make the destination-snowflake tests succeed - https://github.com/snowflakedb/snowflake-jdbc/issues/589#issuecomment-983944767
             jvmArgs = ["--add-opens=java.base/java.nio=ALL-UNNAMED"]
+
+            systemProperties = [
+                // Allow tests to set @Execution(ExecutionMode.CONCURRENT)
+                'junit.jupiter.execution.parallel.enabled': 'true'
+            ]
         }
 
         // make sure we create the integrationTest task once in case a standard source test was already initialized

From 9934901454a8ddf9877f1a027941fc82254c38fd Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Thu, 6 Jul 2023 09:44:19 -0700
Subject: [PATCH 34/46] add test for default namespace

---
 .../BaseTypingDedupingTest.java               | 50 ++++++++++++++++++-
 .../AbstractBigQueryTypingDedupingTest.java   |  3 ++
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 8d264a5cd2e5..75763214272a 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -87,6 +87,8 @@ public abstract class BaseTypingDedupingTest {
    * Subclasses should _not_ start testcontainers in this method; that belongs in a BeforeAll method.
    * The tests in this class are intended to be run concurrently on a shared database and will not
    * interfere with each other.
+   * <p>
+   * Sublcasses which need access to the config may use {@link #getConfig()}.
    */
   protected abstract JsonNode generateConfig() throws Exception;
 
@@ -302,6 +304,41 @@ public void incrementalDedup() throws Exception {
     verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
   }
 
+  /**
+   * Identical to {@link #incrementalDedup()}, except that the stream has no namespace.
+   */
+  @Test
+  public void incrementalDedupDefaultNamespace() throws Exception {
+    ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
+        new ConfiguredAirbyteStream()
+            .withSyncMode(SyncMode.INCREMENTAL)
+            .withCursorField(List.of("updated_at"))
+            .withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP)
+            .withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
+            .withStream(new AirbyteStream()
+                // NB: we don't call `withNamespace` here
+                .withName(streamName)
+                .withJsonSchema(SCHEMA))));
+
+    // First sync
+    List<AirbyteMessage> messages1 = readMessages("sync1_messages.jsonl", null, streamName);
+
+    runSync(catalog, messages1);
+
+    List<JsonNode> expectedRawRecords1 = readRecords("sync1_expectedrecords_dedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords1 = readRecords("sync1_expectedrecords_dedup_final.jsonl");
+    verifySyncResult(expectedRawRecords1, expectedFinalRecords1, null, streamName);
+
+    // Second sync
+    List<AirbyteMessage> messages2 = readMessages("sync2_messages.jsonl", null, streamName);
+
+    runSync(catalog, messages2);
+
+    List<JsonNode> expectedRawRecords2 = readRecords("sync2_expectedrecords_incremental_dedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords2 = readRecords("sync2_expectedrecords_incremental_dedup_final.jsonl");
+    verifySyncResult(expectedRawRecords2, expectedFinalRecords2, null, streamName);
+  }
+
   @Test
   @Disabled("Not yet implemented")
   public void testLineBreakCharacters() throws Exception {
@@ -401,12 +438,19 @@ public void testDataTypes() throws Exception {
   }
 
   private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode> expectedFinalRecords) throws Exception {
+    verifySyncResult(expectedRawRecords, expectedFinalRecords, streamNamespace, streamName);
+  }
+
+  private void verifySyncResult(List<JsonNode> expectedRawRecords,
+                                List<JsonNode> expectedFinalRecords,
+                                String streamNamespace,
+                                String streamName) throws Exception {
     List<JsonNode> actualRawRecords = dumpRawTableRecords(streamNamespace, streamName);
     List<JsonNode> actualFinalRecords = dumpFinalTableRecords(streamNamespace, streamName);
     DIFFER.verifySyncResult(expectedRawRecords, actualRawRecords, expectedFinalRecords, actualFinalRecords);
   }
 
-  private List<JsonNode> readRecords(String filename) throws IOException {
+  private static List<JsonNode> readRecords(String filename) throws IOException {
     return MoreResources.readResource(filename).lines()
         .map(String::trim)
         .filter(line -> !line.isEmpty())
@@ -416,6 +460,10 @@ private List<JsonNode> readRecords(String filename) throws IOException {
   }
 
   private List<AirbyteMessage> readMessages(String filename) throws IOException {
+    return readMessages(filename, streamNamespace, streamName);
+  }
+
+  private static List<AirbyteMessage> readMessages(String filename, String streamNamespace, String streamName) throws IOException {
     return readRecords(filename).stream()
         .map(record -> Jsons.convertValue(record, AirbyteMessage.class))
         .peek(message -> {
diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
index 4fa25ee9b73f..15fc029302b3 100644
--- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
+++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/typing_deduping/AbstractBigQueryTypingDedupingTest.java
@@ -57,6 +57,9 @@ protected List<JsonNode> dumpFinalTableRecords(String streamNamespace, String st
 
   @Override
   protected void teardownStreamAndNamespace(String streamNamespace, String streamName) {
+    if (streamNamespace == null) {
+      streamNamespace = BigQueryUtils.getDatasetId(getConfig());
+    }
     // bq.delete simply returns false if the table/schema doesn't exist (e.g. if the connector failed to create it)
     // so we don't need to do any existence checks here.
     bq.delete(TableId.of("airbyte", streamNamespace + "_" + streamName));

From 760f8298069610b5d7ff7f0913da97c8f8d7af1f Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Thu, 6 Jul 2023 16:47:45 +0000
Subject: [PATCH 35/46] Automated Commit - Formatting Changes

---
 .../destination/typing_deduping/BaseTypingDedupingTest.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 75763214272a..d850fbe204f4 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -444,7 +444,8 @@ private void verifySyncResult(List<JsonNode> expectedRawRecords, List<JsonNode>
   private void verifySyncResult(List<JsonNode> expectedRawRecords,
                                 List<JsonNode> expectedFinalRecords,
                                 String streamNamespace,
-                                String streamName) throws Exception {
+                                String streamName)
+      throws Exception {
     List<JsonNode> actualRawRecords = dumpRawTableRecords(streamNamespace, streamName);
     List<JsonNode> actualFinalRecords = dumpFinalTableRecords(streamNamespace, streamName);
     DIFFER.verifySyncResult(expectedRawRecords, actualRawRecords, expectedFinalRecords, actualFinalRecords);

From c82cadc1b54469b27ac1ef9e1c3d3567442f7e9e Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Thu, 6 Jul 2023 18:44:14 -0700
Subject: [PATCH 36/46] implement a 2-stream test

---
 .../BaseTypingDedupingTest.java               | 71 +++++++++++++++----
 1 file changed, 57 insertions(+), 14 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index d850fbe204f4..42793ff0ec97 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -14,6 +14,7 @@
 import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.AirbyteProtocolType;
 import io.airbyte.protocol.models.v0.AirbyteMessage;
 import io.airbyte.protocol.models.v0.AirbyteStream;
+import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
 import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
 import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
 import io.airbyte.protocol.models.v0.DestinationSyncMode;
@@ -26,9 +27,11 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.UUID;
+import java.util.stream.Stream;
 import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.junit.jupiter.api.AfterEach;
@@ -74,6 +77,7 @@ public abstract class BaseTypingDedupingTest {
   private JsonNode config;
   private String streamNamespace;
   private String streamName;
+  private List<AirbyteStreamNameNamespacePair> streamsToTearDown;
 
   /**
    * @return the docker image to run, e.g. {@code "airbyte/destination-bigquery:dev"}.
@@ -124,10 +128,11 @@ public abstract class BaseTypingDedupingTest {
   /**
    * Delete any resources in the destination associated with this stream AND its namespace. We need
    * this because we write raw tables to a shared {@code airbyte} namespace, which we can't drop
-   * wholesale.
+   * wholesale. Must handle the case where the table/namespace doesn't exist (e.g. if the connector
+   * crashed without writing any data).
    * <p>
    * In general, this should resemble
-   * {@code DROP TABLE IF EXISTS airbyte.namespace_name; DROP SCHEMA IF EXISTS namespace}.
+   * {@code DROP TABLE IF EXISTS airbyte.<streamNamespace>_<streamName>; DROP SCHEMA IF EXISTS <streamNamespace>}.
    */
   protected abstract void teardownStreamAndNamespace(String streamNamespace, String streamName) throws Exception;
 
@@ -150,12 +155,15 @@ public void setup() throws Exception {
     config = generateConfig();
     streamNamespace = "typing_deduping_test" + getUniqueSuffix();
     streamName = "test_stream" + getUniqueSuffix();
+    streamsToTearDown = new ArrayList<>();
     LOGGER.info("Using stream namespace {} and name {}", streamNamespace, streamName);
   }
 
   @AfterEach
   public void teardown() throws Exception {
-    teardownStreamAndNamespace(streamNamespace, streamName);
+    for (AirbyteStreamNameNamespacePair streamId : streamsToTearDown) {
+      teardownStreamAndNamespace(streamId.getNamespace(), streamId.getName());
+    }
   }
 
   /**
@@ -379,30 +387,63 @@ public void testSyncUsesAirbyteStreamNamespaceIfNotNull() throws Exception {
                 .withJsonSchema(SCHEMA))));
   }
 
+  // TODO duplicate this test for each sync mode. Run 1st+2nd syncs using two streams with the same
+  // name but different namespace
+  // TODO maybe we don't even need the single-stream versions...
+  /**
+   * Identical to {@link #incrementalDedup()}, except there are two streams with the same name and different namespace.
+   */
   @Test
-  @Disabled("Not yet implemented")
-  public void testSyncWriteSameTableNameDifferentNamespace() throws Exception {
-    // TODO duplicate this test for each sync mode. Run 1st+2nd syncs using two streams with the same
-    // name but different namespace:
+  public void incrementalDedupIdenticalName() throws Exception {
+    String namespace1 = streamNamespace + "_1";
+    String namespace2 = streamNamespace + "_2";
     ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(
         new ConfiguredAirbyteStream()
-            .withSyncMode(SyncMode.FULL_REFRESH)
+            .withSyncMode(SyncMode.INCREMENTAL)
             .withCursorField(List.of("updated_at"))
-            .withDestinationSyncMode(DestinationSyncMode.OVERWRITE)
+            .withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP)
             .withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
             .withStream(new AirbyteStream()
-                .withNamespace(streamNamespace + "_1")
+                .withNamespace(namespace1)
                 .withName(streamName)
                 .withJsonSchema(SCHEMA)),
         new ConfiguredAirbyteStream()
-            .withSyncMode(SyncMode.FULL_REFRESH)
+            .withSyncMode(SyncMode.INCREMENTAL)
             .withCursorField(List.of("updated_at"))
-            .withDestinationSyncMode(DestinationSyncMode.OVERWRITE)
+            .withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP)
             .withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
             .withStream(new AirbyteStream()
-                .withNamespace(streamNamespace + "_2")
+                .withNamespace(namespace2)
                 .withName(streamName)
-                .withJsonSchema(SCHEMA))));
+                .withJsonSchema(SCHEMA))
+    ));
+
+    // First sync
+    // Read the same set of messages for both streams
+    List<AirbyteMessage> messages1 = Stream.concat(
+        readMessages("sync1_messages.jsonl", namespace1, streamName).stream(),
+        readMessages("sync1_messages.jsonl", namespace2, streamName).stream()
+    ).toList();
+
+    runSync(catalog, messages1);
+
+    List<JsonNode> expectedRawRecords1 = readRecords("sync1_expectedrecords_dedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords1 = readRecords("sync1_expectedrecords_dedup_final.jsonl");
+    verifySyncResult(expectedRawRecords1, expectedFinalRecords1, namespace1, streamName);
+    verifySyncResult(expectedRawRecords1, expectedFinalRecords1, namespace2, streamName);
+
+    // Second sync
+    List<AirbyteMessage> messages2 = Stream.concat(
+        readMessages("sync2_messages.jsonl", namespace1, streamName).stream(),
+        readMessages("sync2_messages.jsonl", namespace2, streamName).stream()
+    ).toList();
+
+    runSync(catalog, messages2);
+
+    List<JsonNode> expectedRawRecords2 = readRecords("sync2_expectedrecords_incremental_dedup_raw.jsonl");
+    List<JsonNode> expectedFinalRecords2 = readRecords("sync2_expectedrecords_incremental_dedup_final.jsonl");
+    verifySyncResult(expectedRawRecords2, expectedFinalRecords2, namespace1, streamName);
+    verifySyncResult(expectedRawRecords2, expectedFinalRecords2, namespace2, streamName);
   }
 
   @Test
@@ -498,6 +539,8 @@ public void setupProcessFactory() throws IOException {
   }
 
   private void runSync(ConfiguredAirbyteCatalog catalog, List<AirbyteMessage> messages) throws Exception {
+    catalog.getStreams().forEach(s -> streamsToTearDown.add(AirbyteStreamNameNamespacePair.fromAirbyteStream(s.getStream())));
+
     final WorkerDestinationConfig destinationConfig = new WorkerDestinationConfig()
         .withConnectionId(UUID.randomUUID())
         .withCatalog(convertProtocolObject(catalog, io.airbyte.protocol.models.ConfiguredAirbyteCatalog.class))

From 0fdea845fcf70c8f194038ae17e5866617e4fe04 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Fri, 7 Jul 2023 01:48:31 +0000
Subject: [PATCH 37/46] Automated Commit - Formatting Changes

---
 .../typing_deduping/BaseTypingDedupingTest.java      | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
index 42793ff0ec97..85ca77f81b69 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/BaseTypingDedupingTest.java
@@ -391,7 +391,8 @@ public void testSyncUsesAirbyteStreamNamespaceIfNotNull() throws Exception {
   // name but different namespace
   // TODO maybe we don't even need the single-stream versions...
   /**
-   * Identical to {@link #incrementalDedup()}, except there are two streams with the same name and different namespace.
+   * Identical to {@link #incrementalDedup()}, except there are two streams with the same name and
+   * different namespace.
    */
   @Test
   public void incrementalDedupIdenticalName() throws Exception {
@@ -415,15 +416,13 @@ public void incrementalDedupIdenticalName() throws Exception {
             .withStream(new AirbyteStream()
                 .withNamespace(namespace2)
                 .withName(streamName)
-                .withJsonSchema(SCHEMA))
-    ));
+                .withJsonSchema(SCHEMA))));
 
     // First sync
     // Read the same set of messages for both streams
     List<AirbyteMessage> messages1 = Stream.concat(
         readMessages("sync1_messages.jsonl", namespace1, streamName).stream(),
-        readMessages("sync1_messages.jsonl", namespace2, streamName).stream()
-    ).toList();
+        readMessages("sync1_messages.jsonl", namespace2, streamName).stream()).toList();
 
     runSync(catalog, messages1);
 
@@ -435,8 +434,7 @@ public void incrementalDedupIdenticalName() throws Exception {
     // Second sync
     List<AirbyteMessage> messages2 = Stream.concat(
         readMessages("sync2_messages.jsonl", namespace1, streamName).stream(),
-        readMessages("sync2_messages.jsonl", namespace2, streamName).stream()
-    ).toList();
+        readMessages("sync2_messages.jsonl", namespace2, streamName).stream()).toList();
 
     runSync(catalog, messages2);
 

From 4442b065118a2543cc70e28a8ec91b5fafebce12 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 10 Jul 2023 16:16:37 -0700
Subject: [PATCH 38/46] extract methods

---
 .../typing_deduping/RecordDiffer.java         | 55 ++++++++++++-------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 0bc8543a3c24..23b97f7ca139 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -43,29 +43,16 @@ public class RecordDiffer {
    * @param identifyingColumns Which fields constitute a unique record (typically PK+cursor). Do _not_
    *        include extracted_at; it is handled automatically.
    */
-  public RecordDiffer(Pair<String, AirbyteType>... identifyingColumns) {
-    // Start with a noop comparator for convenience
-    // The raw and final stuff are almost identical, except the raw version has to extract _airbyte_data
-    // first.
-    Comparator<JsonNode> rawIdComp = Comparator.comparing(record -> 0);
-    Comparator<JsonNode> finalIdComp = Comparator.comparing(record -> 0);
-    for (Pair<String, AirbyteType> column : identifyingColumns) {
-      rawIdComp = rawIdComp.thenComparing(record -> extract(record.get("_airbyte_data"), column.getKey(), column.getValue()));
-      finalIdComp = finalIdComp.thenComparing(record -> extract(record, column.getKey(), column.getValue()));
-    }
-    this.rawRecordIdentityComparator = rawIdComp.thenComparing(record -> asTimestampWithTimezone(record.get("_airbyte_extracted_at")));
+  @SafeVarargs
+  public RecordDiffer(final Pair<String, AirbyteType>... identifyingColumns) {
+    this.rawRecordIdentityComparator = buildIdentityComparator(record -> record.get("_airbyte_data"), identifyingColumns);
+    this.finalRecordIdentityComparator = buildIdentityComparator(record -> record, identifyingColumns);
+
     this.rawRecordSortComparator = rawRecordIdentityComparator.thenComparing(record -> asString(record.get("_airbyte_raw_id")));
-    this.finalRecordIdentityComparator = finalIdComp.thenComparing(record -> asTimestampWithTimezone(record.get("_airbyte_extracted_at")));
     this.finalRecordSortComparator = finalRecordIdentityComparator.thenComparing(record -> asString(record.get("_airbyte_raw_id")));
 
-    rawRecordIdentityExtractor = record -> Arrays.stream(identifyingColumns)
-        .map(column -> getPrintableFieldIfPresent(record.get("_airbyte_data"), column.getKey()))
-        .collect(Collectors.joining(", "))
-        + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
-    finalRecordIdentityExtractor = record -> Arrays.stream(identifyingColumns)
-        .map(column -> getPrintableFieldIfPresent(record, column.getKey()))
-        .collect(Collectors.joining(", "))
-        + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
+    this.rawRecordIdentityExtractor = buildIdentityExtractor(record -> record.get("_airbyte_data"), identifyingColumns);
+    this.finalRecordIdentityExtractor = buildIdentityExtractor(record -> record, identifyingColumns);
   }
 
   /**
@@ -110,6 +97,34 @@ public void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode>
     }
   }
 
+  /**
+   * Build a Comparator to detect equality between two records. It first compares all the identifying
+   * columns in order, and breaks ties using extracted_at.
+   *
+   * @param dataExtractor A function that extracts the data from a record. For raw records, this should
+   *                      return the _airbyte_data field; for final records, this should return the
+   *                      record itself.
+   */
+  private Comparator<JsonNode> buildIdentityComparator(Function<JsonNode, JsonNode> dataExtractor, Pair<String, AirbyteType>[] identifyingColumns) {
+    // Start with a noop comparator for convenience
+    Comparator<JsonNode> comp = Comparator.comparing(record -> 0);
+    for (Pair<String, AirbyteType> column : identifyingColumns) {
+      comp = comp.thenComparing(record -> extract(dataExtractor.apply(record), column.getKey(), column.getValue()));
+    }
+    comp = comp.thenComparing(record -> asTimestampWithTimezone(record.get("_airbyte_extracted_at")));
+    return comp;
+  }
+
+  /**
+   * See {@link #buildIdentityComparator(Function, Pair[])} for an explanation of dataExtractor.
+   */
+  private Function<JsonNode, String> buildIdentityExtractor(Function<JsonNode, JsonNode> dataExtractor, Pair<String, AirbyteType>[] identifyingColumns) {
+    return record -> Arrays.stream(identifyingColumns)
+        .map(column -> getPrintableFieldIfPresent(dataExtractor.apply(record), column.getKey()))
+        .collect(Collectors.joining(", "))
+        + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
+  }
+
   private static String getPrintableFieldIfPresent(JsonNode record, String field) {
     if (record.has(field)) {
       return field + "=" + record.get(field);

From 6d437da026aff24db93ad94e043469746b0fb3c4 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 10 Jul 2023 16:18:11 -0700
Subject: [PATCH 39/46] invert jsonNodesNotEquivalent

---
 .../base/destination/typing_deduping/RecordDiffer.java      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 23b97f7ca139..5088140b1973 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -184,7 +184,7 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
             for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
               JsonNode expectedValue = expectedRawData.get(field);
               JsonNode actualValue = actualRawData.get(field);
-              if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
+              if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
                 mismatchedRecordMessage += generateFieldError("_airbyte_data." + field, expectedValue, actualValue);
                 foundMismatch = true;
               }
@@ -201,7 +201,7 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
             // For all other columns, we can just compare their values directly.
             JsonNode expectedValue = expectedRecord.get(column);
             JsonNode actualValue = actualRecord.get(column);
-            if (jsonNodesNotEquivalent(expectedValue, actualValue)) {
+            if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
               mismatchedRecordMessage += generateFieldError("column " + column, expectedValue, actualValue);
               foundMismatch = true;
             }
@@ -246,7 +246,7 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
     return message;
   }
 
-  private static boolean jsonNodesNotEquivalent(JsonNode expectedValue, JsonNode actualValue) {
+  private static boolean areJsonNodesEquivalent(JsonNode expectedValue, JsonNode actualValue) {
     // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
     return !Objects.equals(expectedValue, actualValue)
         // Objects.equals expects the two values to be the same class.

From de3c2e902e839ef44ba9a5e795b37235d4c63be5 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Mon, 10 Jul 2023 23:27:38 +0000
Subject: [PATCH 40/46] Automated Commit - Formatting Changes

---
 .../base/destination/typing_deduping/RecordDiffer.java   | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 5088140b1973..3df9e167d110 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -101,9 +101,9 @@ public void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode>
    * Build a Comparator to detect equality between two records. It first compares all the identifying
    * columns in order, and breaks ties using extracted_at.
    *
-   * @param dataExtractor A function that extracts the data from a record. For raw records, this should
-   *                      return the _airbyte_data field; for final records, this should return the
-   *                      record itself.
+   * @param dataExtractor A function that extracts the data from a record. For raw records, this
+   *        should return the _airbyte_data field; for final records, this should return the record
+   *        itself.
    */
   private Comparator<JsonNode> buildIdentityComparator(Function<JsonNode, JsonNode> dataExtractor, Pair<String, AirbyteType>[] identifyingColumns) {
     // Start with a noop comparator for convenience
@@ -118,7 +118,8 @@ private Comparator<JsonNode> buildIdentityComparator(Function<JsonNode, JsonNode
   /**
    * See {@link #buildIdentityComparator(Function, Pair[])} for an explanation of dataExtractor.
    */
-  private Function<JsonNode, String> buildIdentityExtractor(Function<JsonNode, JsonNode> dataExtractor, Pair<String, AirbyteType>[] identifyingColumns) {
+  private Function<JsonNode, String> buildIdentityExtractor(Function<JsonNode, JsonNode> dataExtractor,
+                                                            Pair<String, AirbyteType>[] identifyingColumns) {
     return record -> Arrays.stream(identifyingColumns)
         .map(column -> getPrintableFieldIfPresent(dataExtractor.apply(record), column.getKey()))
         .collect(Collectors.joining(", "))

From 58e5d10338667f2625879380d9da9b07bcc3b7d8 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 10 Jul 2023 16:48:54 -0700
Subject: [PATCH 41/46] fix conditional

---
 .../base/destination/typing_deduping/RecordDiffer.java      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 3df9e167d110..0f494d26b481 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -249,11 +249,11 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
 
   private static boolean areJsonNodesEquivalent(JsonNode expectedValue, JsonNode actualValue) {
     // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
-    return !Objects.equals(expectedValue, actualValue)
+    return Objects.equals(expectedValue, actualValue)
         // Objects.equals expects the two values to be the same class.
         // We need to handle comparisons between e.g. LongNode and IntNode.
-        && !(expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
-        && !(expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble());
+        || (expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
+        || (expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble());
   }
 
   /**

From 6ef65b18d7cd6c7860432098f47580711d767823 Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 10 Jul 2023 16:36:22 -0700
Subject: [PATCH 42/46] pull out diffSingleRecord

---
 .../typing_deduping/RecordDiffer.java         | 104 +++++++++---------
 1 file changed, 55 insertions(+), 49 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 0f494d26b481..3a14b1901b23 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -171,55 +171,9 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
       JsonNode actualRecord = actualRecords.get(actualRecordIndex);
       int compare = identityComparator.compare(expectedRecord, actualRecord);
       if (compare == 0) {
-        // These records should be the same. Find the specific fields that are different.
-        boolean foundMismatch = false;
-        String mismatchedRecordMessage = "Row had incorrect data: " + recordIdExtractor.apply(expectedRecord) + "\n";
-        // Iterate through each column in the expected record and compare it to the actual record's value.
-        for (String column : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
-          if (extractRawData && "_airbyte_data".equals(column)) {
-            // For the raw data in particular, we should also diff the fields inside _airbyte_data.
-            JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
-            JsonNode actualRawData = actualRecord.get("_airbyte_data");
-            // Iterate through all the subfields of the expected raw data and check that they match the actual
-            // record...
-            for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
-              JsonNode expectedValue = expectedRawData.get(field);
-              JsonNode actualValue = actualRawData.get(field);
-              if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
-                mismatchedRecordMessage += generateFieldError("_airbyte_data." + field, expectedValue, actualValue);
-                foundMismatch = true;
-              }
-            }
-            // ... and then check the actual raw data for any subfields that we weren't expecting.
-            LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRawData, actualRawData);
-            if (extraColumns.size() > 0) {
-              for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
-                mismatchedRecordMessage += generateFieldError("_airbyte_data." + extraColumn.getKey(), null, extraColumn.getValue());
-                foundMismatch = true;
-              }
-            }
-          } else {
-            // For all other columns, we can just compare their values directly.
-            JsonNode expectedValue = expectedRecord.get(column);
-            JsonNode actualValue = actualRecord.get(column);
-            if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
-              mismatchedRecordMessage += generateFieldError("column " + column, expectedValue, actualValue);
-              foundMismatch = true;
-            }
-          }
-        }
-        // Then check the entire actual record for any columns that we weren't expecting.
-        LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRecord, actualRecord);
-        if (extraColumns.size() > 0) {
-          for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
-            mismatchedRecordMessage += generateFieldError("column " + extraColumn.getKey(), null, extraColumn.getValue());
-            foundMismatch = true;
-          }
-        }
-        if (foundMismatch) {
-          message += mismatchedRecordMessage;
-        }
-
+        // These records should be the same. Find the specific fields that are different and move on
+        // to the next records in both lists.
+        message += diffSingleRecord(recordIdExtractor, extractRawData, expectedRecord, actualRecord);
         expectedRecordIndex++;
         actualRecordIndex++;
       } else if (compare < 0) {
@@ -247,6 +201,58 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
     return message;
   }
 
+  private static String diffSingleRecord(Function<JsonNode, String> recordIdExtractor, boolean extractRawData, JsonNode expectedRecord, JsonNode actualRecord) {
+    boolean foundMismatch = false;
+    String mismatchedRecordMessage = "Row had incorrect data: " + recordIdExtractor.apply(expectedRecord) + "\n";
+    // Iterate through each column in the expected record and compare it to the actual record's value.
+    for (String column : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
+      if (extractRawData && "_airbyte_data".equals(column)) {
+        // For the raw data in particular, we should also diff the fields inside _airbyte_data.
+        JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
+        JsonNode actualRawData = actualRecord.get("_airbyte_data");
+        // Iterate through all the subfields of the expected raw data and check that they match the actual
+        // record...
+        for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
+          JsonNode expectedValue = expectedRawData.get(field);
+          JsonNode actualValue = actualRawData.get(field);
+          if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
+            mismatchedRecordMessage += generateFieldError("_airbyte_data." + field, expectedValue, actualValue);
+            foundMismatch = true;
+          }
+        }
+        // ... and then check the actual raw data for any subfields that we weren't expecting.
+        LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRawData, actualRawData);
+        if (extraColumns.size() > 0) {
+          for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
+            mismatchedRecordMessage += generateFieldError("_airbyte_data." + extraColumn.getKey(), null, extraColumn.getValue());
+            foundMismatch = true;
+          }
+        }
+      } else {
+        // For all other columns, we can just compare their values directly.
+        JsonNode expectedValue = expectedRecord.get(column);
+        JsonNode actualValue = actualRecord.get(column);
+        if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
+          mismatchedRecordMessage += generateFieldError("column " + column, expectedValue, actualValue);
+          foundMismatch = true;
+        }
+      }
+    }
+    // Then check the entire actual record for any columns that we weren't expecting.
+    LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRecord, actualRecord);
+    if (extraColumns.size() > 0) {
+      for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
+        mismatchedRecordMessage += generateFieldError("column " + extraColumn.getKey(), null, extraColumn.getValue());
+        foundMismatch = true;
+      }
+    }
+    if (foundMismatch) {
+      return mismatchedRecordMessage;
+    } else {
+      return "";
+    }
+  }
+
   private static boolean areJsonNodesEquivalent(JsonNode expectedValue, JsonNode actualValue) {
     // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
     return Objects.equals(expectedValue, actualValue)

From 797b60f128f2ac568208297d8d048b221c2cdea1 Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Mon, 10 Jul 2023 23:53:01 +0000
Subject: [PATCH 43/46] Automated Commit - Formatting Changes

---
 .../base/destination/typing_deduping/RecordDiffer.java       | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 3a14b1901b23..21f4c53b8887 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -201,7 +201,10 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
     return message;
   }
 
-  private static String diffSingleRecord(Function<JsonNode, String> recordIdExtractor, boolean extractRawData, JsonNode expectedRecord, JsonNode actualRecord) {
+  private static String diffSingleRecord(Function<JsonNode, String> recordIdExtractor,
+                                         boolean extractRawData,
+                                         JsonNode expectedRecord,
+                                         JsonNode actualRecord) {
     boolean foundMismatch = false;
     String mismatchedRecordMessage = "Row had incorrect data: " + recordIdExtractor.apply(expectedRecord) + "\n";
     // Iterate through each column in the expected record and compare it to the actual record's value.

From 060d30f9c0a175d77a086cd78f32e2b3cd910fcb Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Mon, 10 Jul 2023 16:58:01 -0700
Subject: [PATCH 44/46] handle nulls correctly

---
 .../typing_deduping/RecordDiffer.java          | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 21f4c53b8887..ac6f0b4977f6 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -257,12 +257,18 @@ private static String diffSingleRecord(Function<JsonNode, String> recordIdExtrac
   }
 
   private static boolean areJsonNodesEquivalent(JsonNode expectedValue, JsonNode actualValue) {
-    // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
-    return Objects.equals(expectedValue, actualValue)
-        // Objects.equals expects the two values to be the same class.
-        // We need to handle comparisons between e.g. LongNode and IntNode.
-        || (expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
-        || (expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble());
+    if (expectedValue == null || actualValue == null) {
+      // If one of the values is null, then we expect both of them to be null.
+      return expectedValue == null && actualValue == null;
+    } else {
+      // Otherwise, we need to compare the actual values.
+      // This is kind of sketchy, but seems to work fine for the data we have in our test cases.
+      return expectedValue.equals(actualValue)
+          // equals() expects the two values to be the same class.
+          // We need to handle comparisons between e.g. LongNode and IntNode.
+          || (expectedValue.isIntegralNumber() && actualValue.isIntegralNumber() && expectedValue.asLong() == actualValue.asLong())
+          || (expectedValue.isNumber() && actualValue.isNumber() && expectedValue.asDouble() == actualValue.asDouble());
+    }
   }
 
   /**

From 7539094d533a0432bcc82ed5030bd1d27abacd7c Mon Sep 17 00:00:00 2001
From: Edward Gao <edward.gao@airbyte.io>
Date: Tue, 11 Jul 2023 09:02:45 -0700
Subject: [PATCH 45/46] remove raw-specific handling; break up methods

---
 .../typing_deduping/RecordDiffer.java         | 134 ++++++++----------
 1 file changed, 56 insertions(+), 78 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index ac6f0b4977f6..34879a9cbef4 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -4,9 +4,12 @@
 
 package io.airbyte.integrations.base.destination.typing_deduping;
 
-import static org.junit.jupiter.api.Assertions.*;
+import static java.util.stream.Collectors.toList;
+import static org.junit.jupiter.api.Assertions.assertAll;
+import static org.junit.jupiter.api.Assertions.fail;
 
 import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.google.common.collect.Streams;
 import io.airbyte.commons.json.Jsons;
 import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType.AirbyteProtocolType;
@@ -21,7 +24,6 @@
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Objects;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import org.apache.commons.lang3.tuple.Pair;
@@ -32,12 +34,9 @@
  */
 public class RecordDiffer {
 
-  private final Comparator<JsonNode> rawRecordIdentityComparator;
-  private final Comparator<JsonNode> rawRecordSortComparator;
-  private final Function<JsonNode, String> rawRecordIdentityExtractor;
-  private final Comparator<JsonNode> finalRecordIdentityComparator;
-  private final Comparator<JsonNode> finalRecordSortComparator;
-  private final Function<JsonNode, String> finalRecordIdentityExtractor;
+  private final Comparator<JsonNode> recordIdentityComparator;
+  private final Comparator<JsonNode> recordSortComparator;
+  private final Function<JsonNode, String> recordIdentityExtractor;
 
   /**
    * @param identifyingColumns Which fields constitute a unique record (typically PK+cursor). Do _not_
@@ -45,14 +44,9 @@ public class RecordDiffer {
    */
   @SafeVarargs
   public RecordDiffer(final Pair<String, AirbyteType>... identifyingColumns) {
-    this.rawRecordIdentityComparator = buildIdentityComparator(record -> record.get("_airbyte_data"), identifyingColumns);
-    this.finalRecordIdentityComparator = buildIdentityComparator(record -> record, identifyingColumns);
-
-    this.rawRecordSortComparator = rawRecordIdentityComparator.thenComparing(record -> asString(record.get("_airbyte_raw_id")));
-    this.finalRecordSortComparator = finalRecordIdentityComparator.thenComparing(record -> asString(record.get("_airbyte_raw_id")));
-
-    this.rawRecordIdentityExtractor = buildIdentityExtractor(record -> record.get("_airbyte_data"), identifyingColumns);
-    this.finalRecordIdentityExtractor = buildIdentityExtractor(record -> record, identifyingColumns);
+    this.recordIdentityComparator = buildIdentityComparator(identifyingColumns);
+    this.recordSortComparator = recordIdentityComparator.thenComparing(record -> asString(record.get("_airbyte_raw_id")));
+    this.recordIdentityExtractor = buildIdentityExtractor(identifyingColumns);
   }
 
   /**
@@ -71,12 +65,12 @@ public void verifySyncResult(List<JsonNode> expectedRawRecords,
 
   public void diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> actualRecords) {
     String diff = diffRecords(
-        expectedRecords,
-        actualRecords,
-        rawRecordIdentityComparator,
-        rawRecordSortComparator,
-        rawRecordIdentityExtractor,
-        true);
+        expectedRecords.stream().map(RecordDiffer::copyWithLiftedData).collect(toList()),
+        actualRecords.stream().map(RecordDiffer::copyWithLiftedData).collect(toList()),
+        recordIdentityComparator,
+        recordSortComparator,
+        recordIdentityExtractor
+    );
 
     if (!diff.isEmpty()) {
       fail("Raw table was incorrect.\n" + diff);
@@ -87,41 +81,54 @@ public void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode>
     String diff = diffRecords(
         expectedRecords,
         actualRecords,
-        finalRecordIdentityComparator,
-        finalRecordSortComparator,
-        finalRecordIdentityExtractor,
-        false);
+        recordIdentityComparator,
+        recordSortComparator,
+        recordIdentityExtractor
+    );
 
     if (!diff.isEmpty()) {
       fail("Final table was incorrect.\n" + diff);
     }
   }
 
+  /**
+   * @return A copy of the record, but with all fields in _airbyte_data lifted to the top level.
+   */
+  private static JsonNode copyWithLiftedData(JsonNode record) {
+    ObjectNode copy = record.deepCopy();
+    copy.remove("_airbyte_data");
+    Streams.stream(record.get("_airbyte_data").fields()).forEach(field -> {
+      if (!copy.has(field.getKey())) {
+        copy.set(field.getKey(), field.getValue());
+      } else {
+        // This would only happen if the record has one of the metadata columns (e.g. _airbyte_raw_id)
+        // We don't support that in production, so we don't support it here either.
+        throw new RuntimeException("Cannot lift field " + field.getKey() + " because it already exists in the record.");
+      }
+    });
+    return copy;
+  }
+
   /**
    * Build a Comparator to detect equality between two records. It first compares all the identifying
    * columns in order, and breaks ties using extracted_at.
-   *
-   * @param dataExtractor A function that extracts the data from a record. For raw records, this
-   *        should return the _airbyte_data field; for final records, this should return the record
-   *        itself.
    */
-  private Comparator<JsonNode> buildIdentityComparator(Function<JsonNode, JsonNode> dataExtractor, Pair<String, AirbyteType>[] identifyingColumns) {
+  private Comparator<JsonNode> buildIdentityComparator(Pair<String, AirbyteType>[] identifyingColumns) {
     // Start with a noop comparator for convenience
     Comparator<JsonNode> comp = Comparator.comparing(record -> 0);
     for (Pair<String, AirbyteType> column : identifyingColumns) {
-      comp = comp.thenComparing(record -> extract(dataExtractor.apply(record), column.getKey(), column.getValue()));
+      comp = comp.thenComparing(record -> extract(record, column.getKey(), column.getValue()));
     }
     comp = comp.thenComparing(record -> asTimestampWithTimezone(record.get("_airbyte_extracted_at")));
     return comp;
   }
 
   /**
-   * See {@link #buildIdentityComparator(Function, Pair[])} for an explanation of dataExtractor.
+   * See {@link #buildIdentityComparator(Pair[])} for an explanation of dataExtractor.
    */
-  private Function<JsonNode, String> buildIdentityExtractor(Function<JsonNode, JsonNode> dataExtractor,
-                                                            Pair<String, AirbyteType>[] identifyingColumns) {
+  private Function<JsonNode, String> buildIdentityExtractor(Pair<String, AirbyteType>[] identifyingColumns) {
     return record -> Arrays.stream(identifyingColumns)
-        .map(column -> getPrintableFieldIfPresent(dataExtractor.apply(record), column.getKey()))
+        .map(column -> getPrintableFieldIfPresent(record, column.getKey()))
         .collect(Collectors.joining(", "))
         + getPrintableFieldIfPresent(record, "_airbyte_extracted_at");
   }
@@ -145,19 +152,17 @@ private static String getPrintableFieldIfPresent(JsonNode record, String field)
    * to be present.
    *
    * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
-   *        PK+cursor+extracted_at)
-   * @param sortComparator Behaves identically to identityComparator, but if two records are the same,
-   *        breaks that tie using _airbyte_raw_id
-   * @param recordIdExtractor Dump the record's PK+cursor+extracted_at into a human-readable string
-   * @param extractRawData Whether to look inside the _airbyte_data column and diff its subfields
+   *                           PK+cursor+extracted_at)
+   * @param sortComparator     Behaves identically to identityComparator, but if two records are the same,
+   *                           breaks that tie using _airbyte_raw_id
+   * @param recordIdExtractor  Dump the record's PK+cursor+extracted_at into a human-readable string
    * @return The diff, or empty string if there were no differences
    */
   private static String diffRecords(List<JsonNode> originalExpectedRecords,
                                     List<JsonNode> originalActualRecords,
                                     Comparator<JsonNode> identityComparator,
                                     Comparator<JsonNode> sortComparator,
-                                    Function<JsonNode, String> recordIdExtractor,
-                                    boolean extractRawData) {
+                                    Function<JsonNode, String> recordIdExtractor) {
     List<JsonNode> expectedRecords = originalExpectedRecords.stream().sorted(sortComparator).toList();
     List<JsonNode> actualRecords = originalActualRecords.stream().sorted(sortComparator).toList();
 
@@ -173,7 +178,7 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
       if (compare == 0) {
         // These records should be the same. Find the specific fields that are different and move on
         // to the next records in both lists.
-        message += diffSingleRecord(recordIdExtractor, extractRawData, expectedRecord, actualRecord);
+        message += diffSingleRecord(recordIdExtractor, expectedRecord, actualRecord);
         expectedRecordIndex++;
         actualRecordIndex++;
       } else if (compare < 0) {
@@ -201,44 +206,17 @@ private static String diffRecords(List<JsonNode> originalExpectedRecords,
     return message;
   }
 
-  private static String diffSingleRecord(Function<JsonNode, String> recordIdExtractor,
-                                         boolean extractRawData,
-                                         JsonNode expectedRecord,
-                                         JsonNode actualRecord) {
+  private static String diffSingleRecord(Function<JsonNode, String> recordIdExtractor, JsonNode expectedRecord, JsonNode actualRecord) {
     boolean foundMismatch = false;
     String mismatchedRecordMessage = "Row had incorrect data: " + recordIdExtractor.apply(expectedRecord) + "\n";
     // Iterate through each column in the expected record and compare it to the actual record's value.
     for (String column : Streams.stream(expectedRecord.fieldNames()).sorted().toList()) {
-      if (extractRawData && "_airbyte_data".equals(column)) {
-        // For the raw data in particular, we should also diff the fields inside _airbyte_data.
-        JsonNode expectedRawData = expectedRecord.get("_airbyte_data");
-        JsonNode actualRawData = actualRecord.get("_airbyte_data");
-        // Iterate through all the subfields of the expected raw data and check that they match the actual
-        // record...
-        for (String field : Streams.stream(expectedRawData.fieldNames()).sorted().toList()) {
-          JsonNode expectedValue = expectedRawData.get(field);
-          JsonNode actualValue = actualRawData.get(field);
-          if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
-            mismatchedRecordMessage += generateFieldError("_airbyte_data." + field, expectedValue, actualValue);
-            foundMismatch = true;
-          }
-        }
-        // ... and then check the actual raw data for any subfields that we weren't expecting.
-        LinkedHashMap<String, JsonNode> extraColumns = checkForExtraOrNonNullFields(expectedRawData, actualRawData);
-        if (extraColumns.size() > 0) {
-          for (Map.Entry<String, JsonNode> extraColumn : extraColumns.entrySet()) {
-            mismatchedRecordMessage += generateFieldError("_airbyte_data." + extraColumn.getKey(), null, extraColumn.getValue());
-            foundMismatch = true;
-          }
-        }
-      } else {
-        // For all other columns, we can just compare their values directly.
-        JsonNode expectedValue = expectedRecord.get(column);
-        JsonNode actualValue = actualRecord.get(column);
-        if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
-          mismatchedRecordMessage += generateFieldError("column " + column, expectedValue, actualValue);
-          foundMismatch = true;
-        }
+      // For all other columns, we can just compare their values directly.
+      JsonNode expectedValue = expectedRecord.get(column);
+      JsonNode actualValue = actualRecord.get(column);
+      if (!areJsonNodesEquivalent(expectedValue, actualValue)) {
+        mismatchedRecordMessage += generateFieldError("column " + column, expectedValue, actualValue);
+        foundMismatch = true;
       }
     }
     // Then check the entire actual record for any columns that we weren't expecting.

From 144970b77163cece6931e9607752468c6f15d3df Mon Sep 17 00:00:00 2001
From: edgao <edgao@users.noreply.github.com>
Date: Tue, 11 Jul 2023 16:09:16 +0000
Subject: [PATCH 46/46] Automated Commit - Formatting Changes

---
 .../destination/typing_deduping/RecordDiffer.java  | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
index 34879a9cbef4..846fb4a88bff 100644
--- a/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
+++ b/airbyte-integrations/bases/base-typing-deduping-test/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/RecordDiffer.java
@@ -69,8 +69,7 @@ public void diffRawTableRecords(List<JsonNode> expectedRecords, List<JsonNode> a
         actualRecords.stream().map(RecordDiffer::copyWithLiftedData).collect(toList()),
         recordIdentityComparator,
         recordSortComparator,
-        recordIdentityExtractor
-    );
+        recordIdentityExtractor);
 
     if (!diff.isEmpty()) {
       fail("Raw table was incorrect.\n" + diff);
@@ -83,8 +82,7 @@ public void diffFinalTableRecords(List<JsonNode> expectedRecords, List<JsonNode>
         actualRecords,
         recordIdentityComparator,
         recordSortComparator,
-        recordIdentityExtractor
-    );
+        recordIdentityExtractor);
 
     if (!diff.isEmpty()) {
       fail("Final table was incorrect.\n" + diff);
@@ -152,10 +150,10 @@ private static String getPrintableFieldIfPresent(JsonNode record, String field)
    * to be present.
    *
    * @param identityComparator Returns 0 iff two records are the "same" record (i.e. have the same
-   *                           PK+cursor+extracted_at)
-   * @param sortComparator     Behaves identically to identityComparator, but if two records are the same,
-   *                           breaks that tie using _airbyte_raw_id
-   * @param recordIdExtractor  Dump the record's PK+cursor+extracted_at into a human-readable string
+   *        PK+cursor+extracted_at)
+   * @param sortComparator Behaves identically to identityComparator, but if two records are the same,
+   *        breaks that tie using _airbyte_raw_id
+   * @param recordIdExtractor Dump the record's PK+cursor+extracted_at into a human-readable string
    * @return The diff, or empty string if there were no differences
    */
   private static String diffRecords(List<JsonNode> originalExpectedRecords,