diff --git a/airbyte-integrations/bases/debezium-v1-4-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java b/airbyte-integrations/bases/debezium-v1-4-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java index a1049f0b7450..04cd2bfc20b8 100644 --- a/airbyte-integrations/bases/debezium-v1-4-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java +++ b/airbyte-integrations/bases/debezium-v1-4-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java @@ -316,7 +316,7 @@ void testDelete() throws Exception { .format("DELETE FROM %s.%s WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, 11)); - final JsonNode state = stateMessages1.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateMessages1); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -347,7 +347,7 @@ void testUpdate() throws Exception { .format("UPDATE %s.%s SET %s = '%s' WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_MODEL, updatedModel, COL_ID, 11)); - final JsonNode state = stateMessages1.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateMessages1); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -403,7 +403,7 @@ void testRecordsProducedDuringAndAfterSync() throws Exception { recordsCreated[0]++; } - final JsonNode state = stateAfterFirstBatch.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateAfterFirstBatch); final AutoCloseableIterator secondBatchIterator = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List dataFromSecondBatch = AutoCloseableIterators @@ -492,7 +492,7 @@ void testCdcAndFullRefreshInSameSync() throws Exception { .jsonNode(ImmutableMap.of(COL_ID, 100, COL_MAKE_ID, 3, COL_MODEL, "Punto")); writeModelRecord(puntoRecord); - final JsonNode state = extractStateMessages(actualRecords1).get(0).getData(); + final JsonNode state = Jsons.jsonNode(extractStateMessages(actualRecords1)); final AutoCloseableIterator read2 = getSource() .read(getConfig(), configuredCatalog, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -535,7 +535,7 @@ void testNoDataOnSecondSync() throws Exception { final AutoCloseableIterator read1 = getSource() .read(getConfig(), CONFIGURED_CATALOG, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); - final JsonNode state = extractStateMessages(actualRecords1).get(0).getData(); + final JsonNode state = Jsons.jsonNode(extractStateMessages(actualRecords1)); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); diff --git a/airbyte-integrations/bases/debezium-v1-9-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java b/airbyte-integrations/bases/debezium-v1-9-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java index 79d6dbbd5b31..441de6ff481e 100644 --- a/airbyte-integrations/bases/debezium-v1-9-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java +++ b/airbyte-integrations/bases/debezium-v1-9-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java @@ -316,7 +316,7 @@ void testDelete() throws Exception { .format("DELETE FROM %s.%s WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, 11)); - final JsonNode state = stateMessages1.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateMessages1); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -347,7 +347,7 @@ void testUpdate() throws Exception { .format("UPDATE %s.%s SET %s = '%s' WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_MODEL, updatedModel, COL_ID, 11)); - final JsonNode state = stateMessages1.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateMessages1); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -399,7 +399,7 @@ protected void testRecordsProducedDuringAndAfterSync() throws Exception { writeModelRecord(record); } - final JsonNode state = stateAfterFirstBatch.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateAfterFirstBatch); final AutoCloseableIterator secondBatchIterator = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List dataFromSecondBatch = AutoCloseableIterators @@ -488,7 +488,7 @@ void testCdcAndFullRefreshInSameSync() throws Exception { .jsonNode(ImmutableMap.of(COL_ID, 100, COL_MAKE_ID, 3, COL_MODEL, "Punto")); writeModelRecord(puntoRecord); - final JsonNode state = extractStateMessages(actualRecords1).get(0).getData(); + final JsonNode state = Jsons.jsonNode(extractStateMessages(actualRecords1)); final AutoCloseableIterator read2 = getSource() .read(getConfig(), configuredCatalog, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -531,7 +531,7 @@ void testNoDataOnSecondSync() throws Exception { final AutoCloseableIterator read1 = getSource() .read(getConfig(), CONFIGURED_CATALOG, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); - final JsonNode state = extractStateMessages(actualRecords1).get(0).getData(); + final JsonNode state = Jsons.jsonNode(extractStateMessages(actualRecords1)); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java index 186d0b3c14ad..a6e2d50c85aa 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import io.airbyte.commons.json.Jsons; import io.airbyte.config.StandardCheckConnectionOutput.Status; @@ -106,6 +107,18 @@ public abstract class SourceAcceptanceTest extends AbstractSourceConnectorTest { */ protected abstract JsonNode getState() throws Exception; + /** + * Tests whether the connector under test supports the per-stream state format or should use the + * legacy format for data generated by this test. + * + * @return {@code true} if the connector supports the per-stream state format or {@code false} if it + * does not support the per-stream state format (e.g. legacy format supported). Default + * value is {@code false}. + */ + protected boolean supportsPerStream() { + return false; + } + /** * Verify that a spec operation issued to the connector returns a valid spec. */ @@ -236,7 +249,7 @@ public void testIncrementalSyncWithState() throws Exception { // when we run incremental sync again there should be no new records. Run a sync with the latest // state message and assert no records were emitted. - final JsonNode latestState = stateMessages.get(stateMessages.size() - 1).getData(); + final JsonNode latestState = Jsons.jsonNode(supportsPerStream() ? stateMessages : List.of(Iterables.getLast(stateMessages))); final List secondSyncRecords = filterRecords(runRead(configuredCatalog, latestState)); assertTrue( secondSyncRecords.isEmpty(), diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java index 909194580404..01e1837b7992 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java @@ -15,8 +15,14 @@ import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.protocol.models.AirbyteGlobalState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.test.utils.PostgreSQLContainerHelper; import java.sql.JDBCType; +import java.util.List; import java.util.Set; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -82,6 +88,11 @@ public String getDriverClass() { return PostgresTestSource.DRIVER_CLASS; } + @Override + protected boolean supportsPerStream() { + return true; + } + @AfterAll static void cleanUp() { PSQL_DB.close(); @@ -118,6 +129,27 @@ public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } + // TODO This is a temporary override so that the Postgres source can take advantage of per-stream + // state + @Override + protected List generateEmptyInitialState(final JsonNode config) { + if (getSupportedStateType(config) == AirbyteStateType.GLOBAL) { + final AirbyteGlobalState globalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(new CdcState())) + .withStreamStates(List.of()); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState)); + } else { + return List.of(new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState())); + } + } + + @Override + protected AirbyteStateType getSupportedStateType(final JsonNode config) { + return AirbyteStateType.STREAM; + } + public static void main(final String[] args) throws Exception { final Source source = new PostgresTestSource(); LOGGER.info("starting source: {}", PostgresTestSource.class); diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index 802d8ac79bc7..74d8d7add0af 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -13,10 +13,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; @@ -39,7 +35,9 @@ import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; @@ -47,6 +45,7 @@ import io.airbyte.protocol.models.DestinationSyncMode; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.StreamDescriptor; import io.airbyte.protocol.models.SyncMode; import java.math.BigDecimal; import java.sql.SQLException; @@ -54,6 +53,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.function.Function; @@ -82,7 +82,7 @@ public abstract class JdbcSourceAcceptanceTest { // otherwise parallel runs can interfere with each other public static String SCHEMA_NAME = Strings.addRandomSuffix("jdbc_integration_test1", "_", 5).toLowerCase(); public static String SCHEMA_NAME2 = Strings.addRandomSuffix("jdbc_integration_test2", "_", 5).toLowerCase(); - public static Set TEST_SCHEMAS = ImmutableSet.of(SCHEMA_NAME, SCHEMA_NAME2); + public static Set TEST_SCHEMAS = Set.of(SCHEMA_NAME, SCHEMA_NAME2); public static String TABLE_NAME = "id_and_name"; public static String TABLE_NAME_WITH_SPACES = "id and name"; @@ -255,7 +255,7 @@ public void setup() throws Exception { connection.createStatement().execute( createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), COLUMN_CLAUSE_WITH_COMPOSITE_PK, - primaryKeyClause(ImmutableList.of("first_name", "last_name")))); + primaryKeyClause(List.of("first_name", "last_name")))); connection.createStatement().execute( String.format( "INSERT INTO %s(first_name, last_name, updated_at) VALUES ('first' ,'picard', '2004-10-19')", @@ -354,12 +354,15 @@ void testDiscoverWithMultipleSchemas() throws Exception { final AirbyteCatalog actual = source.discover(config); final AirbyteCatalog expected = getCatalog(getDefaultNamespace()); - expected.getStreams().add(CatalogHelpers + final List catalogStreams = new ArrayList<>(); + catalogStreams.addAll(expected.getStreams()); + catalogStreams.add(CatalogHelpers .createAirbyteStream(TABLE_NAME, SCHEMA_NAME2, Field.of(COL_ID, JsonSchemaType.STRING), Field.of(COL_NAME, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))); + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))); + expected.setStreams(catalogStreams); // sort streams by name so that we are comparing lists with the same order. final Comparator schemaTableCompare = Comparator.comparing(stream -> stream.getNamespace() + "." + stream.getName()); expected.getStreams().sort(schemaTableCompare); @@ -389,9 +392,8 @@ void testReadOneColumn() throws Exception { setEmittedAtToNull(actualMessages); final List expectedMessages = getAirbyteMessagesReadOneColumn(); - assertTrue(expectedMessages.size() == actualMessages.size()); - assertTrue(expectedMessages.containsAll(actualMessages)); - assertTrue(actualMessages.containsAll(expectedMessages)); + assertEquals(expectedMessages.size(), actualMessages.size()); + assertEquals(expectedMessages, actualMessages); } protected List getAirbyteMessagesReadOneColumn() { @@ -437,8 +439,7 @@ void testReadMultipleTables() throws Exception { Field.of(COL_ID, JsonSchemaType.NUMBER), Field.of(COL_NAME, JsonSchemaType.STRING))); - final List secondStreamExpectedMessages = getAirbyteMessagesSecondSync(streamName2); - expectedMessages.addAll(secondStreamExpectedMessages); + expectedMessages.addAll(getAirbyteMessagesSecondSync(streamName2)); } final List actualMessages = MoreIterators @@ -446,12 +447,11 @@ void testReadMultipleTables() throws Exception { setEmittedAtToNull(actualMessages); - assertTrue(expectedMessages.size() == actualMessages.size()); - assertTrue(expectedMessages.containsAll(actualMessages)); - assertTrue(actualMessages.containsAll(expectedMessages)); + assertEquals(expectedMessages.size(), actualMessages.size()); + assertEquals(expectedMessages, actualMessages); } - protected List getAirbyteMessagesSecondSync(String streamName2) { + protected List getAirbyteMessagesSecondSync(final String streamName2) { return getTestMessages() .stream() .map(Jsons::clone) @@ -471,7 +471,7 @@ void testTablesWithQuoting() throws Exception { final ConfiguredAirbyteStream streamForTableWithSpaces = createTableWithSpaces(); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList( + .withStreams(List.of( getConfiguredCatalogWithOneStream(getDefaultNamespace()).getStreams().get(0), streamForTableWithSpaces)); final List actualMessages = MoreIterators @@ -479,16 +479,14 @@ void testTablesWithQuoting() throws Exception { setEmittedAtToNull(actualMessages); - final List secondStreamExpectedMessages = getAirbyteMessagesForTablesWithQuoting(streamForTableWithSpaces); final List expectedMessages = new ArrayList<>(getTestMessages()); - expectedMessages.addAll(secondStreamExpectedMessages); + expectedMessages.addAll(getAirbyteMessagesForTablesWithQuoting(streamForTableWithSpaces)); - assertTrue(expectedMessages.size() == actualMessages.size()); - assertTrue(expectedMessages.containsAll(actualMessages)); - assertTrue(actualMessages.containsAll(expectedMessages)); + assertEquals(expectedMessages.size(), actualMessages.size()); + assertEquals(expectedMessages, actualMessages); } - protected List getAirbyteMessagesForTablesWithQuoting(ConfiguredAirbyteStream streamForTableWithSpaces) { + protected List getAirbyteMessagesForTablesWithQuoting(final ConfiguredAirbyteStream streamForTableWithSpaces) { return getTestMessages() .stream() .map(Jsons::clone) @@ -509,7 +507,7 @@ void testReadFailure() { final ConfiguredAirbyteStream spiedAbStream = spy( getConfiguredCatalogWithOneStream(getDefaultNamespace()).getStreams().get(0)); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList(spiedAbStream)); + .withStreams(List.of(spiedAbStream)); doCallRealMethod().doThrow(new RuntimeException()).when(spiedAbStream).getStream(); assertThrows(RuntimeException.class, () -> source.read(config, catalog, null)); @@ -521,7 +519,7 @@ void testIncrementalNoPreviousState() throws Exception { COL_ID, null, "3", - Lists.newArrayList(getTestMessages())); + getTestMessages()); } @Test @@ -530,7 +528,7 @@ void testIncrementalIntCheckCursor() throws Exception { COL_ID, "2", "3", - Lists.newArrayList(getTestMessages().get(2))); + List.of(getTestMessages().get(2))); } @Test @@ -539,14 +537,14 @@ void testIncrementalStringCheckCursor() throws Exception { COL_NAME, "patent", "vash", - Lists.newArrayList(getTestMessages().get(0), getTestMessages().get(2))); + List.of(getTestMessages().get(0), getTestMessages().get(2))); } @Test void testIncrementalStringCheckCursorSpaceInColumnName() throws Exception { final ConfiguredAirbyteStream streamWithSpaces = createTableWithSpaces(); - final ArrayList expectedRecordMessages = getAirbyteMessagesCheckCursorSpaceInColumnName(streamWithSpaces); + final List expectedRecordMessages = getAirbyteMessagesCheckCursorSpaceInColumnName(streamWithSpaces); incrementalCursorCheck( COL_LAST_NAME_WITH_SPACE, COL_LAST_NAME_WITH_SPACE, @@ -556,7 +554,7 @@ void testIncrementalStringCheckCursorSpaceInColumnName() throws Exception { streamWithSpaces); } - protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnName(ConfiguredAirbyteStream streamWithSpaces) { + protected List getAirbyteMessagesCheckCursorSpaceInColumnName(final ConfiguredAirbyteStream streamWithSpaces) { final AirbyteMessage firstMessage = getTestMessages().get(0); firstMessage.getRecord().setStream(streamWithSpaces.getStream().getName()); ((ObjectNode) firstMessage.getRecord().getData()).remove(COL_UPDATED_AT); @@ -569,9 +567,7 @@ protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnNa ((ObjectNode) secondMessage.getRecord().getData()).set(COL_LAST_NAME_WITH_SPACE, ((ObjectNode) secondMessage.getRecord().getData()).remove(COL_NAME)); - Lists.newArrayList(getTestMessages().get(0), getTestMessages().get(2)); - - return Lists.newArrayList(firstMessage, secondMessage); + return List.of(firstMessage, secondMessage); } @Test @@ -584,7 +580,7 @@ protected void incrementalDateCheck() throws Exception { COL_UPDATED_AT, "2005-10-18T00:00:00Z", "2006-10-19T00:00:00Z", - Lists.newArrayList(getTestMessages().get(1), getTestMessages().get(2))); + List.of(getTestMessages().get(1), getTestMessages().get(2))); } @Test @@ -597,7 +593,7 @@ void testIncrementalCursorChanges() throws Exception { // records to (incorrectly) be filtered out. "data", "vash", - Lists.newArrayList(getTestMessages())); + getTestMessages()); } @Test @@ -606,14 +602,12 @@ void testReadOneTableIncrementallyTwice() throws Exception { final ConfiguredAirbyteCatalog configuredCatalog = getConfiguredCatalogWithOneStream(namespace); configuredCatalog.getStreams().forEach(airbyteStream -> { airbyteStream.setSyncMode(SyncMode.INCREMENTAL); - airbyteStream.setCursorField(Lists.newArrayList(COL_ID)); + airbyteStream.setCursorField(List.of(COL_ID)); airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); }); - final DbState state = new DbState() - .withStreams(Lists.newArrayList(new DbStreamState().withStreamName(streamName).withStreamNamespace(namespace))); final List actualMessagesFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, Jsons.jsonNode(state))); + .toList(source.read(config, configuredCatalog, createEmptyState(streamName, namespace))); final Optional stateAfterFirstSyncOptional = actualMessagesFirstSync.stream() .filter(r -> r.getType() == Type.STATE).findFirst(); @@ -622,8 +616,7 @@ void testReadOneTableIncrementallyTwice() throws Exception { executeStatementReadIncrementallyTwice(); final List actualMessagesSecondSync = MoreIterators - .toList(source.read(config, configuredCatalog, - stateAfterFirstSyncOptional.get().getState().getData())); + .toList(source.read(config, configuredCatalog, extractState(stateAfterFirstSyncOptional.get()))); assertEquals(2, (int) actualMessagesSecondSync.stream().filter(r -> r.getType() == Type.RECORD).count()); @@ -631,9 +624,8 @@ void testReadOneTableIncrementallyTwice() throws Exception { setEmittedAtToNull(actualMessagesSecondSync); - assertTrue(expectedMessages.size() == actualMessagesSecondSync.size()); - assertTrue(expectedMessages.containsAll(actualMessagesSecondSync)); - assertTrue(actualMessagesSecondSync.containsAll(expectedMessages)); + assertEquals(expectedMessages.size(), actualMessagesSecondSync.size()); + assertEquals(expectedMessages, actualMessagesSecondSync); } protected void executeStatementReadIncrementallyTwice() throws SQLException { @@ -647,30 +639,26 @@ protected void executeStatementReadIncrementallyTwice() throws SQLException { }); } - protected List getExpectedAirbyteMessagesSecondSync(String namespace) { + protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19T00:00:00Z"))))); expectedMessages.add(new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_5, COL_NAME, "data", COL_UPDATED_AT, "2006-10-19T00:00:00Z"))))); - expectedMessages.add(new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList(new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("5"))))))); + final DbStreamState state = new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("5"); + expectedMessages.addAll(createExpectedTestMessages(List.of(state))); return expectedMessages; } @@ -702,14 +690,12 @@ void testReadMultipleTablesIncrementally() throws Exception { Field.of(COL_NAME, JsonSchemaType.STRING))); configuredCatalog.getStreams().forEach(airbyteStream -> { airbyteStream.setSyncMode(SyncMode.INCREMENTAL); - airbyteStream.setCursorField(Lists.newArrayList(COL_ID)); + airbyteStream.setCursorField(List.of(COL_ID)); airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); }); - final DbState state = new DbState() - .withStreams(Lists.newArrayList(new DbStreamState().withStreamName(streamName).withStreamNamespace(namespace))); final List actualMessagesFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, Jsons.jsonNode(state))); + .toList(source.read(config, configuredCatalog, createEmptyState(streamName, namespace))); // get last state message. final Optional stateAfterFirstSyncOptional = actualMessagesFirstSync.stream() @@ -720,49 +706,44 @@ void testReadMultipleTablesIncrementally() throws Exception { // we know the second streams messages are the same as the first minus the updated at column. so we // cheat and generate the expected messages off of the first expected messages. final List secondStreamExpectedMessages = getAirbyteMessagesSecondStreamWithNamespace(streamName2); - final List expectedMessagesFirstSync = new ArrayList<>(getTestMessages()); - expectedMessagesFirstSync.add(new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList( - new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("3"), - new DbStreamState() - .withStreamName(streamName2) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)))))))); + // Represents the state after the first stream has been updated + final List expectedStateStreams1 = List.of( + new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("3"), + new DbStreamState() + .withStreamName(streamName2) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID))); + + // Represents the state after both streams have been updated + final List expectedStateStreams2 = List.of( + new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("3"), + new DbStreamState() + .withStreamName(streamName2) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("3")); + + final List expectedMessagesFirstSync = new ArrayList<>(getTestMessages()); + expectedMessagesFirstSync.add(createStateMessage(expectedStateStreams1.get(0), expectedStateStreams1)); expectedMessagesFirstSync.addAll(secondStreamExpectedMessages); - expectedMessagesFirstSync.add(new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList( - new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("3"), - new DbStreamState() - .withStreamName(streamName2) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("3"))))))); + expectedMessagesFirstSync.add(createStateMessage(expectedStateStreams2.get(1), expectedStateStreams2)); setEmittedAtToNull(actualMessagesFirstSync); - assertTrue(expectedMessagesFirstSync.size() == actualMessagesFirstSync.size()); - assertTrue(expectedMessagesFirstSync.containsAll(actualMessagesFirstSync)); - assertTrue(actualMessagesFirstSync.containsAll(expectedMessagesFirstSync)); + assertEquals(expectedMessagesFirstSync.size(), actualMessagesFirstSync.size()); + assertEquals(expectedMessagesFirstSync, actualMessagesFirstSync); } - protected List getAirbyteMessagesSecondStreamWithNamespace(String streamName2) { + protected List getAirbyteMessagesSecondStreamWithNamespace(final String streamName2) { return getTestMessages() .stream() .map(Jsons::clone) @@ -807,39 +788,34 @@ private void incrementalCursorCheck( final ConfiguredAirbyteStream airbyteStream) throws Exception { airbyteStream.setSyncMode(SyncMode.INCREMENTAL); - airbyteStream.setCursorField(Lists.newArrayList(cursorField)); + airbyteStream.setCursorField(List.of(cursorField)); airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); - final DbState state = new DbState() - .withStreams(Lists.newArrayList(new DbStreamState() - .withStreamName(airbyteStream.getStream().getName()) - .withStreamNamespace(airbyteStream.getStream().getNamespace()) - .withCursorField(ImmutableList.of(initialCursorField)) - .withCursor(initialCursorValue))); - final ConfiguredAirbyteCatalog configuredCatalog = new ConfiguredAirbyteCatalog() - .withStreams(ImmutableList.of(airbyteStream)); + .withStreams(List.of(airbyteStream)); + + final DbStreamState dbStreamState = new DbStreamState() + .withStreamName(airbyteStream.getStream().getName()) + .withStreamNamespace(airbyteStream.getStream().getNamespace()) + .withCursorField(List.of(initialCursorField)) + .withCursor(initialCursorValue); final List actualMessages = MoreIterators - .toList(source.read(config, configuredCatalog, Jsons.jsonNode(state))); + .toList(source.read(config, configuredCatalog, Jsons.jsonNode(createState(List.of(dbStreamState))))); setEmittedAtToNull(actualMessages); + final List expectedStreams = List.of( + new DbStreamState() + .withStreamName(airbyteStream.getStream().getName()) + .withStreamNamespace(airbyteStream.getStream().getNamespace()) + .withCursorField(List.of(cursorField)) + .withCursor(endCursorValue)); final List expectedMessages = new ArrayList<>(expectedRecordMessages); - expectedMessages.add(new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList(new DbStreamState() - .withStreamName(airbyteStream.getStream().getName()) - .withStreamNamespace(airbyteStream.getStream().getNamespace()) - .withCursorField(ImmutableList.of(cursorField)) - .withCursor(endCursorValue))))))); - - assertTrue(expectedMessages.size() == actualMessages.size()); - assertTrue(expectedMessages.containsAll(actualMessages)); - assertTrue(actualMessages.containsAll(expectedMessages)); + expectedMessages.addAll(createExpectedTestMessages(expectedStreams)); + + assertEquals(expectedMessages.size(), actualMessages.size()); + assertEquals(expectedMessages, actualMessages); } // get catalog and perform a defensive copy. @@ -853,14 +829,14 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalogWithOneStream(final Strin } protected AirbyteCatalog getCatalog(final String defaultNamespace) { - return new AirbyteCatalog().withStreams(Lists.newArrayList( + return new AirbyteCatalog().withStreams(List.of( CatalogHelpers.createAirbyteStream( TABLE_NAME, defaultNamespace, Field.of(COL_ID, JsonSchemaType.NUMBER), Field.of(COL_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))), CatalogHelpers.createAirbyteStream( TABLE_NAME_WITHOUT_PK, @@ -868,7 +844,7 @@ protected AirbyteCatalog getCatalog(final String defaultNamespace) { Field.of(COL_ID, JsonSchemaType.NUMBER), Field.of(COL_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey(Collections.emptyList()), CatalogHelpers.createAirbyteStream( TABLE_NAME_COMPOSITE_PK, @@ -876,34 +852,62 @@ protected AirbyteCatalog getCatalog(final String defaultNamespace) { Field.of(COL_FIRST_NAME, JsonSchemaType.STRING), Field.of(COL_LAST_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey( List.of(List.of(COL_FIRST_NAME), List.of(COL_LAST_NAME))))); } protected List getTestMessages() { - return Lists.newArrayList( + return List.of( new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_1, COL_NAME, "picard", COL_UPDATED_AT, "2004-10-19T00:00:00Z")))), new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_2, COL_NAME, "crusher", COL_UPDATED_AT, "2005-10-19T00:00:00Z")))), new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_3, COL_NAME, "vash", COL_UPDATED_AT, "2006-10-19T00:00:00Z"))))); } + protected List createExpectedTestMessages(final List states) { + return supportsPerStream() + ? states.stream() + .map(s -> new AirbyteMessage().withType(Type.STATE) + .withState( + new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s))) + .withData(Jsons.jsonNode(new DbState().withCdc(false).withStreams(states))))) + .collect( + Collectors.toList()) + : List.of(new AirbyteMessage().withType(Type.STATE).withState(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withCdc(false).withStreams(states))))); + } + + protected List createState(final List states) { + return supportsPerStream() + ? states.stream() + .map(s -> new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s)))) + .collect( + Collectors.toList()) + : List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(new DbState().withStreams(states)))); + } + protected ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { final String tableNameWithSpaces = TABLE_NAME_WITH_SPACES + "2"; final String streamName2 = tableNameWithSpaces; @@ -994,4 +998,67 @@ protected static void setEmittedAtToNull(final Iterable messages } } + /** + * Tests whether the connector under test supports the per-stream state format or should use the + * legacy format for data generated by this test. + * + * @return {@code true} if the connector supports the per-stream state format or {@code false} if it + * does not support the per-stream state format (e.g. legacy format supported). Default + * value is {@code false}. + */ + protected boolean supportsPerStream() { + return false; + } + + /** + * Creates empty state with the provided stream name and namespace. + * + * @param streamName The stream name. + * @param streamNamespace The stream namespace. + * @return {@link JsonNode} representation of the generated empty state. + */ + protected JsonNode createEmptyState(final String streamName, final String streamNamespace) { + if (supportsPerStream()) { + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(streamName).withNamespace(streamNamespace))); + return Jsons.jsonNode(List.of(airbyteStateMessage)); + } else { + final DbState dbState = new DbState() + .withStreams(List.of(new DbStreamState().withStreamName(streamName).withStreamNamespace(streamNamespace))); + return Jsons.jsonNode(dbState); + } + } + + /** + * Extracts the state component from the provided {@link AirbyteMessage} based on the value returned + * by {@link #supportsPerStream()}. + * + * @param airbyteMessage An {@link AirbyteMessage} that contains state. + * @return A {@link JsonNode} representation of the state contained in the {@link AirbyteMessage}. + */ + protected JsonNode extractState(final AirbyteMessage airbyteMessage) { + if (supportsPerStream()) { + return Jsons.jsonNode(List.of(airbyteMessage.getState())); + } else { + return airbyteMessage.getState().getData(); + } + } + + protected AirbyteMessage createStateMessage(final DbStreamState dbStreamState, final List legacyStates) { + if (supportsPerStream()) { + return new AirbyteMessage().withType(Type.STATE) + .withState( + new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(dbStreamState.getStreamNamespace()) + .withName(dbStreamState.getStreamName())) + .withStreamState(Jsons.jsonNode(dbStreamState))) + .withData(Jsons.jsonNode(new DbState().withCdc(false).withStreams(legacyStates)))); + } else { + return new AirbyteMessage().withType(Type.STATE).withState(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withCdc(false).withStreams(legacyStates)))); + } + } + } diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java index 63f92f7977c4..ad275bda45c2 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java @@ -10,13 +10,14 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.debezium.CdcStateHandler; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +42,11 @@ public AirbyteMessage saveState(final Map offset, final String d final CdcState cdcState = new CdcState().withState(asJson); stateManager.getCdcStateManager().setCdcState(cdcState); - final AirbyteStateMessage stateMessage = stateManager.emit(); + /* + * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. + * Therefore, provide an empty optional. + */ + final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty()); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java index 2a770d8e1ddd..1eea401030f1 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -25,8 +25,8 @@ import io.airbyte.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.mssql.MssqlCdcHelper.SnapshotIsolation; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteStream; diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java index d6171c06ff82..e896f3082ce7 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java @@ -10,13 +10,14 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.debezium.CdcStateHandler; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +43,11 @@ public AirbyteMessage saveState(final Map offset, final String d final CdcState cdcState = new CdcState().withState(asJson); stateManager.getCdcStateManager().setCdcState(cdcState); - final AirbyteStateMessage stateMessage = stateManager.emit(); + /* + * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. + * Therefore, provide an empty optional. + */ + final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty()); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index ea435043efc9..5c2ef9b99a01 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -25,9 +25,9 @@ import io.airbyte.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.mysql.helpers.CdcConfigurationHelper; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteStream; diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java index f1008f08b40c..b23b8953fc82 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; @@ -174,7 +175,7 @@ public void testIncrementalSyncFailedIfBinlogIsDeleted() throws Exception { // when we run incremental sync again there should be no new records. Run a sync with the latest // state message and assert no records were emitted. - final JsonNode latestState = stateMessages.get(stateMessages.size() - 1).getData(); + final JsonNode latestState = Jsons.jsonNode(supportsPerStream() ? stateMessages : List.of(Iterables.getLast(stateMessages))); // RESET MASTER removes all binary log files that are listed in the index file, // leaving only a single, empty binary log file with a numeric suffix of .000001 executeQuery("RESET MASTER;"); diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java index 50c93d0405ce..6175f81c904f 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java @@ -7,12 +7,13 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.debezium.CdcStateHandler; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; import java.util.Map; +import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,7 +32,11 @@ public AirbyteMessage saveState(final Map offset, final String d LOGGER.info("debezium state: {}", asJson); final CdcState cdcState = new CdcState().withState(asJson); stateManager.getCdcStateManager().setCdcState(cdcState); - final AirbyteStateMessage stateMessage = stateManager.emit(); + /* + * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. + * Therefore, provide an empty optional. + */ + final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty()); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index cb83f7324c69..76aaa2c88d11 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -26,12 +26,17 @@ import io.airbyte.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.jdbc.dto.JdbcPrivilegeDto; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteGlobalState; import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.SyncMode; @@ -404,6 +409,27 @@ private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { return stream; } + // TODO This is a temporary override so that the Postgres source can take advantage of per-stream + // state + @Override + protected List generateEmptyInitialState(final JsonNode config) { + if (getSupportedStateType(config) == AirbyteStateType.GLOBAL) { + final AirbyteGlobalState globalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(new CdcState())) + .withStreamStates(List.of()); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState)); + } else { + return List.of(new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState())); + } + } + + @Override + protected AirbyteStateType getSupportedStateType(final JsonNode config) { + return isCdc(config) ? AirbyteStateType.GLOBAL : AirbyteStateType.STREAM; + } + public static void main(final String[] args) throws Exception { final Source source = PostgresSource.sshWrappedSource(); LOGGER.info("starting source: {}", PostgresSource.class); diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java index 633e9715f59c..911a24f02f21 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java @@ -135,4 +135,9 @@ protected JsonNode getState() { return Jsons.jsonNode(new HashMap<>()); } + @Override + protected boolean supportsPerStream() { + return true; + } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java index acd1da14241f..623d2ef11e80 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java @@ -134,4 +134,9 @@ protected JsonNode getState() { return Jsons.jsonNode(new HashMap<>()); } + @Override + protected boolean supportsPerStream() { + return true; + } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java index 569d84d6e6cb..6752036e504e 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java @@ -130,4 +130,9 @@ protected JsonNode getState() { return Jsons.jsonNode(new HashMap<>()); } + @Override + protected boolean supportsPerStream() { + return true; + } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java index 6d2caa067420..2aa5e03ebfda 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java @@ -287,7 +287,7 @@ public void testRecordsProducedDuringAndAfterSync() throws Exception { writeModelRecord(record); } - final JsonNode state = stateAfterFirstBatch.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateAfterFirstBatch); final AutoCloseableIterator secondBatchIterator = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List dataFromSecondBatch = AutoCloseableIterators diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java index 459a44fa86e3..1695d4ed8543 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java @@ -22,12 +22,10 @@ import io.airbyte.db.jdbc.streaming.AdaptiveStreamingQueryConfig; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; -import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteRecordMessage; -import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.ConnectorSpecification; @@ -175,7 +173,7 @@ protected List getAirbyteMessagesReadOneColumn() { } @Override - protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnName(ConfiguredAirbyteStream streamWithSpaces) { + protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnName(final ConfiguredAirbyteStream streamWithSpaces) { final AirbyteMessage firstMessage = getTestMessages().get(0); firstMessage.getRecord().setStream(streamWithSpaces.getStream().getName()); ((ObjectNode) firstMessage.getRecord().getData()).remove(COL_UPDATED_AT); @@ -200,7 +198,7 @@ protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnNa } @Override - protected List getAirbyteMessagesSecondSync(String streamName2) { + protected List getAirbyteMessagesSecondSync(final String streamName2) { return getTestMessages() .stream() .map(Jsons::clone) @@ -217,7 +215,7 @@ protected List getAirbyteMessagesSecondSync(String streamName2) .collect(Collectors.toList()); } - protected List getAirbyteMessagesSecondStreamWithNamespace(String streamName2) { + protected List getAirbyteMessagesSecondStreamWithNamespace(final String streamName2) { return getTestMessages() .stream() .map(Jsons::clone) @@ -233,7 +231,7 @@ protected List getAirbyteMessagesSecondStreamWithNamespace(Strin .collect(Collectors.toList()); } - protected List getAirbyteMessagesForTablesWithQuoting(ConfiguredAirbyteStream streamForTableWithSpaces) { + protected List getAirbyteMessagesForTablesWithQuoting(final ConfiguredAirbyteStream streamForTableWithSpaces) { return getTestMessages() .stream() .map(Jsons::clone) @@ -410,7 +408,7 @@ protected JdbcSourceOperations getSourceOperations() { } @Override - protected List getExpectedAirbyteMessagesSecondSync(String namespace) { + protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) @@ -430,17 +428,18 @@ protected List getExpectedAirbyteMessagesSecondSync(String names COL_WAKEUP_AT, "12:12:12.123456-05:00", COL_LAST_VISITED_AT, "2006-10-19T17:23:54.123456Z", COL_LAST_COMMENT_AT, "2006-01-01T17:23:54.123456"))))); - expectedMessages.add(new AirbyteMessage() - .withType(AirbyteMessage.Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList(new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("5"))))))); + final DbStreamState state = new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(ImmutableList.of(COL_ID)) + .withCursor("5"); + expectedMessages.addAll(createExpectedTestMessages(List.of(state))); return expectedMessages; } + @Override + protected boolean supportsPerStream() { + return true; + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index 6ebdc7aa751e..389d7e555432 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -20,12 +20,17 @@ import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.state.AirbyteStateMessageListTypeReference; +import io.airbyte.integrations.source.relationaldb.state.StateManager; +import io.airbyte.integrations.source.relationaldb.state.StateManagerFactory; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.CommonField; @@ -103,9 +108,8 @@ public AutoCloseableIterator read(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final JsonNode state) throws Exception { - final StateManager stateManager = new StateManager( - state == null ? StateManager.emptyState() : Jsons.object(state, DbState.class), - catalog); + final StateManager stateManager = + StateManagerFactory.createStateManager(getSupportedStateType(config), deserializeInitialState(state, config), catalog); final Instant emittedAt = Instant.now(); final Database database = createDatabaseInternal(config); @@ -509,4 +513,45 @@ private Database createDatabaseInternal(final JsonNode sourceConfig) throws Exce return database; } + /** + * Deserializes the state represented as JSON into an object representation. + * + * @param initialStateJson The state as JSON. + * @param config The connector configuration. + * @return The deserialized object representation of the state. + */ + protected List deserializeInitialState(final JsonNode initialStateJson, final JsonNode config) { + if (initialStateJson == null) { + return generateEmptyInitialState(config); + } else { + try { + return Jsons.object(initialStateJson, new AirbyteStateMessageListTypeReference()); + } catch (final IllegalArgumentException e) { + LOGGER.warn("Defaulting to legacy state object..."); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(initialStateJson)); + } + } + } + + /** + * Generates an empty, initial state for use by the connector. + * + * @param config The connector configuration. + * @return The empty, initial state. + */ + protected List generateEmptyInitialState(final JsonNode config) { + // For backwards compatibility with existing connectors + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(new DbState()))); + } + + /** + * Returns the {@link AirbyteStateType} supported by this connector. + * + * @param config The connector configuration. + * @return A {@link AirbyteStateType} representing the state supported by this connector. + */ + protected AirbyteStateType getSupportedStateType(final JsonNode config) { + return AirbyteStateType.LEGACY; + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java index db33dfd6167b..7b855e6c9770 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java @@ -4,7 +4,6 @@ package io.airbyte.integrations.source.relationaldb; -import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.source.relationaldb.models.CdcState; import org.slf4j.Logger; @@ -12,14 +11,13 @@ public class CdcStateManager { - private static final Logger LOGGER = LoggerFactory.getLogger(StateManager.class); + private static final Logger LOGGER = LoggerFactory.getLogger(CdcStateManager.class); private final CdcState initialState; private CdcState currentState; - @VisibleForTesting - CdcStateManager(final CdcState serialized) { + public CdcStateManager(final CdcState serialized) { this.initialState = serialized; this.currentState = serialized; diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java index 122d62ddbb65..7eabaad9eb31 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java @@ -7,6 +7,7 @@ import com.google.common.collect.AbstractIterator; import io.airbyte.db.IncrementalUtils; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; @@ -40,7 +41,6 @@ public StateDecoratingIterator(final Iterator messageIterator, this.cursorField = cursorField; this.cursorType = cursorType; this.maxCursor = initialCursor; - stateManager.setIsCdc(false); } private String getCursorCandidate(final AirbyteMessage message) { diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java deleted file mode 100644 index 3e509e2869d9..000000000000 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.relationaldb; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.relationaldb.models.DbState; -import io.airbyte.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Handles the state machine for the state of source implementations. - */ -public class StateManager { - - private static final Logger LOGGER = LoggerFactory.getLogger(StateManager.class); - - private final Map pairToCursorInfo; - private Boolean isCdc; - private final CdcStateManager cdcStateManager; - - public static DbState emptyState() { - return new DbState(); - } - - public StateManager(final DbState serialized, final ConfiguredAirbyteCatalog catalog) { - this.cdcStateManager = new CdcStateManager(serialized.getCdcState()); - this.isCdc = serialized.getCdc(); - if (serialized.getCdc() == null) { - this.isCdc = false; - } - - pairToCursorInfo = - new ImmutableMap.Builder().putAll(createCursorInfoMap(serialized, catalog)).build(); - } - - private static Map createCursorInfoMap(final DbState serialized, - final ConfiguredAirbyteCatalog catalog) { - final Set allStreamNames = catalog.getStreams() - .stream() - .map(ConfiguredAirbyteStream::getStream) - .map(AirbyteStreamNameNamespacePair::fromAirbyteSteam) - .collect(Collectors.toSet()); - allStreamNames.addAll(serialized.getStreams().stream().map(StateManager::toAirbyteStreamNameNamespacePair).collect(Collectors.toSet())); - - final Map localMap = new HashMap<>(); - final Map pairToState = serialized.getStreams() - .stream() - .collect(Collectors.toMap(StateManager::toAirbyteStreamNameNamespacePair, a -> a)); - final Map pairToConfiguredAirbyteStream = catalog.getStreams().stream() - .collect(Collectors.toMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, s -> s)); - - for (final AirbyteStreamNameNamespacePair pair : allStreamNames) { - final Optional stateOptional = Optional.ofNullable(pairToState.get(pair)); - final Optional streamOptional = Optional.ofNullable(pairToConfiguredAirbyteStream.get(pair)); - localMap.put(pair, createCursorInfoForStream(pair, stateOptional, streamOptional)); - } - - return localMap; - } - - private static AirbyteStreamNameNamespacePair toAirbyteStreamNameNamespacePair(final DbStreamState state) { - return new AirbyteStreamNameNamespacePair(state.getStreamName(), state.getStreamNamespace()); - } - - @VisibleForTesting - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - static CursorInfo createCursorInfoForStream(final AirbyteStreamNameNamespacePair pair, - final Optional stateOptional, - final Optional streamOptional) { - final String originalCursorField = stateOptional - .map(DbStreamState::getCursorField) - .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) - .orElse(null); - final String originalCursor = stateOptional.map(DbStreamState::getCursor).orElse(null); - - final String cursor; - final String cursorField; - - // if cursor field is set in catalog. - if (streamOptional.map(ConfiguredAirbyteStream::getCursorField).isPresent()) { - cursorField = streamOptional - .map(ConfiguredAirbyteStream::getCursorField) - .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) - .orElse(null); - // if cursor field is set in state. - if (stateOptional.map(DbStreamState::getCursorField).isPresent()) { - // if cursor field in catalog and state are the same. - if (stateOptional.map(DbStreamState::getCursorField).equals(streamOptional.map(ConfiguredAirbyteStream::getCursorField))) { - cursor = stateOptional.map(DbStreamState::getCursor).orElse(null); - LOGGER.info("Found matching cursor in state. Stream: {}. Cursor Field: {} Value: {}", pair, cursorField, cursor); - // if cursor field in catalog and state are different. - } else { - cursor = null; - LOGGER.info( - "Found cursor field. Does not match previous cursor field. Stream: {}. Original Cursor Field: {}. New Cursor Field: {}. Resetting cursor value.", - pair, originalCursorField, cursorField); - } - // if cursor field is not set in state but is set in catalog. - } else { - LOGGER.info("No cursor field set in catalog but not present in state. Stream: {}, New Cursor Field: {}. Resetting cursor value", pair, - cursorField); - cursor = null; - } - // if cursor field is not set in catalog. - } else { - LOGGER.info( - "Cursor field set in state but not present in catalog. Stream: {}. Original Cursor Field: {}. Original value: {}. Resetting cursor.", - pair, originalCursorField, originalCursor); - cursorField = null; - cursor = null; - } - - return new CursorInfo(originalCursorField, originalCursor, cursorField, cursor); - } - - private Optional getCursorInfo(final AirbyteStreamNameNamespacePair pair) { - return Optional.ofNullable(pairToCursorInfo.get(pair)); - } - - public Optional getOriginalCursorField(final AirbyteStreamNameNamespacePair pair) { - return getCursorInfo(pair).map(CursorInfo::getOriginalCursorField); - } - - public Optional getOriginalCursor(final AirbyteStreamNameNamespacePair pair) { - return getCursorInfo(pair).map(CursorInfo::getOriginalCursor); - } - - public Optional getCursorField(final AirbyteStreamNameNamespacePair pair) { - return getCursorInfo(pair).map(CursorInfo::getCursorField); - } - - public Optional getCursor(final AirbyteStreamNameNamespacePair pair) { - return getCursorInfo(pair).map(CursorInfo::getCursor); - } - - synchronized public AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pair, final String cursor) { - // cdc file gets updated by debezium so the "update" part is a no op. - if (!isCdc) { - final Optional cursorInfo = getCursorInfo(pair); - Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); - cursorInfo.get().setCursor(cursor); - } - - return toState(); - } - - public void setIsCdc(final boolean isCdc) { - if (this.isCdc == null) { - this.isCdc = isCdc; - } else { - Preconditions.checkState(this.isCdc == isCdc, "attempt to set cdc to {}, but is already set to {}.", isCdc, this.isCdc); - } - } - - public CdcStateManager getCdcStateManager() { - return cdcStateManager; - } - - public AirbyteStateMessage emit() { - return toState(); - } - - private AirbyteStateMessage toState() { - final DbState DbState = new DbState() - .withCdc(isCdc) - .withStreams(pairToCursorInfo.entrySet().stream() - .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> new DbStreamState() - .withStreamName(e.getKey().getName()) - .withStreamNamespace(e.getKey().getNamespace()) - .withCursorField(e.getValue().getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(e.getValue().getCursorField())) - .withCursor(e.getValue().getCursor())) - .collect(Collectors.toList())) - .withCdcState(cdcStateManager.getCdcState()); - - return new AirbyteStateMessage().withData(Jsons.jsonNode(DbState)); - } - -} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java new file mode 100644 index 000000000000..dec78ec39fac --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; +import java.util.function.Supplier; + +/** + * Abstract implementation of the {@link StateManager} interface that provides common functionality + * for state manager implementations. + * + * @param The type associated with the state object managed by this manager. + * @param The type associated with the state object stored in the state managed by this manager. + */ +public abstract class AbstractStateManager implements StateManager { + + /** + * The {@link CursorManager} responsible for keeping track of the current cursor value for each + * stream managed by this state manager. + */ + private final CursorManager cursorManager; + + /** + * Constructs a new state manager for the given configured connector. + * + * @param catalog The connector's configured catalog. + * @param streamSupplier A {@link Supplier} that provides the cursor manager with the collection of + * streams tracked by the connector's state. + * @param cursorFunction A {@link Function} that extracts the current cursor from a stream stored in + * the connector's state. + * @param cursorFieldFunction A {@link Function} that extracts the cursor field name from a stream + * stored in the connector's state. + * @param namespacePairFunction A {@link Function} that generates a + * {@link AirbyteStreamNameNamespacePair} that identifies each stream in the connector's + * state. + */ + public AbstractStateManager(final ConfiguredAirbyteCatalog catalog, + final Supplier> streamSupplier, + final Function cursorFunction, + final Function> cursorFieldFunction, + final Function namespacePairFunction) { + cursorManager = new CursorManager(catalog, streamSupplier, cursorFunction, cursorFieldFunction, namespacePairFunction); + } + + @Override + public Map getPairToCursorInfoMap() { + return cursorManager.getPairToCursorInfo(); + } + + @Override + public abstract AirbyteStateMessage toState(final Optional pair); + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AirbyteStateMessageListTypeReference.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AirbyteStateMessageListTypeReference.java new file mode 100644 index 000000000000..c7e153e6d79a --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AirbyteStateMessageListTypeReference.java @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.fasterxml.jackson.core.type.TypeReference; +import io.airbyte.protocol.models.AirbyteStateMessage; +import java.util.List; + +public class AirbyteStateMessageListTypeReference extends TypeReference> { + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java new file mode 100644 index 000000000000..207b51ad5bad --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Manages the map of streams to current cursor values for state management. + * + * @param The type that represents the stream object which holds the current cursor information + * in the state. + */ +public class CursorManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(CursorManager.class); + + /** + * Map of streams (name/namespace tuple) to the current cursor information stored in the state. + */ + private final Map pairToCursorInfo; + + /** + * Constructs a new {@link CursorManager} based on the configured connector and current state + * information. + * + * @param catalog The connector's configured catalog. + * @param streamSupplier A {@link Supplier} that provides the cursor manager with the collection of + * streams tracked by the connector's state. + * @param cursorFunction A {@link Function} that extracts the current cursor from a stream stored in + * the connector's state. + * @param cursorFieldFunction A {@link Function} that extracts the cursor field name from a stream + * stored in the connector's state. + * @param namespacePairFunction A {@link Function} that generates a + * {@link AirbyteStreamNameNamespacePair} that identifies each stream in the connector's + * state. + */ + public CursorManager(final ConfiguredAirbyteCatalog catalog, + final Supplier> streamSupplier, + final Function cursorFunction, + final Function> cursorFieldFunction, + final Function namespacePairFunction) { + pairToCursorInfo = createCursorInfoMap(catalog, streamSupplier, cursorFunction, cursorFieldFunction, namespacePairFunction); + } + + /** + * Creates the cursor information map that associates stream name/namespace tuples with the current + * cursor information for that stream as stored in the connector's state. + * + * @param catalog The connector's configured catalog. + * @param streamSupplier A {@link Supplier} that provides the cursor manager with the collection of + * streams tracked by the connector's state. + * @param cursorFunction A {@link Function} that extracts the current cursor from a stream stored in + * the connector's state. + * @param cursorFieldFunction A {@link Function} that extracts the cursor field name from a stream + * stored in the connector's state. + * @param namespacePairFunction A {@link Function} that generates a + * {@link AirbyteStreamNameNamespacePair} that identifies each stream in the connector's + * state. + * @return A map of streams to current cursor information for the stream. + */ + @VisibleForTesting + protected Map createCursorInfoMap( + final ConfiguredAirbyteCatalog catalog, + final Supplier> streamSupplier, + final Function cursorFunction, + final Function> cursorFieldFunction, + final Function namespacePairFunction) { + final Set allStreamNames = catalog.getStreams() + .stream() + .map(ConfiguredAirbyteStream::getStream) + .map(AirbyteStreamNameNamespacePair::fromAirbyteSteam) + .collect(Collectors.toSet()); + allStreamNames.addAll(streamSupplier.get().stream().map(namespacePairFunction).filter(n -> n != null).collect(Collectors.toSet())); + + final Map localMap = new HashMap<>(); + final Map pairToState = streamSupplier.get() + .stream() + .collect(Collectors.toMap(namespacePairFunction,Function.identity())); + final Map pairToConfiguredAirbyteStream = catalog.getStreams().stream() + .collect(Collectors.toMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, Function.identity())); + + for (final AirbyteStreamNameNamespacePair pair : allStreamNames) { + final Optional stateOptional = Optional.ofNullable(pairToState.get(pair)); + final Optional streamOptional = Optional.ofNullable(pairToConfiguredAirbyteStream.get(pair)); + localMap.put(pair, createCursorInfoForStream(pair, stateOptional, streamOptional, cursorFunction, cursorFieldFunction)); + } + + return localMap; + } + + /** + * Generates a {@link CursorInfo} object based on the data currently stored in the connector's state + * for the given stream. + * + * @param pair A {@link AirbyteStreamNameNamespacePair} that identifies a specific stream managed by + * the connector. + * @param stateOptional {@link Optional} containing the current state associated with the stream. + * @param streamOptional {@link Optional} containing the {@link ConfiguredAirbyteStream} associated + * with the stream. + * @param cursorFunction A {@link Function} that provides the current cursor from the state + * associated with the stream. + * @param cursorFieldFunction A {@link Function} that provides the cursor field name for the cursor + * stored in the state associated with the stream. + * @return A {@link CursorInfo} object based on the data currently stored in the connector's state + * for the given stream. + */ + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + @VisibleForTesting + protected CursorInfo createCursorInfoForStream(final AirbyteStreamNameNamespacePair pair, + final Optional stateOptional, + final Optional streamOptional, + final Function cursorFunction, + final Function> cursorFieldFunction) { + final String originalCursorField = stateOptional + .map(cursorFieldFunction) + .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) + .orElse(null); + final String originalCursor = stateOptional.map(cursorFunction).orElse(null); + + final String cursor; + final String cursorField; + + // if cursor field is set in catalog. + if (streamOptional.map(ConfiguredAirbyteStream::getCursorField).isPresent()) { + cursorField = streamOptional + .map(ConfiguredAirbyteStream::getCursorField) + .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) + .orElse(null); + // if cursor field is set in state. + if (stateOptional.map(cursorFieldFunction).isPresent()) { + // if cursor field in catalog and state are the same. + if (stateOptional.map(cursorFieldFunction).equals(streamOptional.map(ConfiguredAirbyteStream::getCursorField))) { + cursor = stateOptional.map(cursorFunction).orElse(null); + LOGGER.info("Found matching cursor in state. Stream: {}. Cursor Field: {} Value: {}", pair, cursorField, cursor); + // if cursor field in catalog and state are different. + } else { + cursor = null; + LOGGER.info( + "Found cursor field. Does not match previous cursor field. Stream: {}. Original Cursor Field: {}. New Cursor Field: {}. Resetting cursor value.", + pair, originalCursorField, cursorField); + } + // if cursor field is not set in state but is set in catalog. + } else { + LOGGER.info("No cursor field set in catalog but not present in state. Stream: {}, New Cursor Field: {}. Resetting cursor value", pair, + cursorField); + cursor = null; + } + // if cursor field is not set in catalog. + } else { + LOGGER.info( + "Cursor field set in state but not present in catalog. Stream: {}. Original Cursor Field: {}. Original value: {}. Resetting cursor.", + pair, originalCursorField, originalCursor); + cursorField = null; + cursor = null; + } + + return new CursorInfo(originalCursorField, originalCursor, cursorField, cursor); + } + + /** + * Retrieves a copy of the stream name/namespace tuple to current cursor information map. + * + * @return A copy of the stream name/namespace tuple to current cursor information map. + */ + public Map getPairToCursorInfo() { + return Map.copyOf(pairToCursorInfo); + } + + /** + * Retrieves an {@link Optional} possibly containing the current {@link CursorInfo} associated with + * the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the current {@link CursorInfo} associated with + * the provided stream name/namespace tuple. + */ + public Optional getCursorInfo(final AirbyteStreamNameNamespacePair pair) { + return Optional.ofNullable(pairToCursorInfo.get(pair)); + } + + /** + * Retrieves an {@link Optional} possibly containing the cursor field name associated with the + * cursor tracked in the state associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the cursor field name associated with the cursor + * tracked in the state associated with the provided stream name/namespace tuple. + */ + public Optional getCursorField(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getCursorField); + } + + /** + * Retrieves an {@link Optional} possibly containing the cursor value tracked in the state + * associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the cursor value tracked in the state associated + * with the provided stream name/namespace tuple. + */ + public Optional getCursor(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getCursor); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java new file mode 100644 index 000000000000..ca8b516c7cb3 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FIELD_FUNCTION; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FUNCTION; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.NAME_NAMESPACE_PAIR_FUNCTION; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.protocol.models.AirbyteGlobalState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Global implementation of the {@link StateManager} interface. + * + * This implementation generates a single, global state object for the state tracked by this + * manager. + */ +public class GlobalStateManager extends AbstractStateManager { + + /** + * Legacy {@link CdcStateManager} used to manage state for connectors that support Change Data + * Capture (CDC). + */ + private final CdcStateManager cdcStateManager; + + /** + * Constructs a new {@link GlobalStateManager} that is seeded with the provided + * {@link AirbyteStateMessage}. + * + * @param airbyteStateMessage The initial state represented as an {@link AirbyteStateMessage}. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state + * manager. + */ + public GlobalStateManager(final AirbyteStateMessage airbyteStateMessage, final ConfiguredAirbyteCatalog catalog) { + super(catalog, + getStreamsSupplier(airbyteStateMessage), + CURSOR_FUNCTION, + CURSOR_FIELD_FUNCTION, + NAME_NAMESPACE_PAIR_FUNCTION); + + this.cdcStateManager = new CdcStateManager(extractCdcState(airbyteStateMessage)); + } + + @Override + public CdcStateManager getCdcStateManager() { + return cdcStateManager; + } + + @Override + public AirbyteStateMessage toState(final Optional pair) { + // Populate global state + final AirbyteGlobalState globalState = new AirbyteGlobalState(); + globalState.setSharedState(Jsons.jsonNode(getCdcStateManager().getCdcState())); + globalState.setStreamStates(StateGeneratorUtils.generateStreamStateList(getPairToCursorInfoMap())); + + // Generate the legacy state for backwards compatibility + final DbState dbState = StateGeneratorUtils.generateDbState(getPairToCursorInfoMap()) + .withCdc(true) + .withCdcState(getCdcStateManager().getCdcState()); + + return new AirbyteStateMessage() + .withStateType(AirbyteStateType.GLOBAL) + // Temporarily include legacy state for backwards compatibility with the platform + .withData(Jsons.jsonNode(dbState)) + .withGlobal(globalState); + } + + /** + * Extracts the Change Data Capture (CDC) state stored in the initial state provided to this state + * manager. + * + * @param airbyteStateMessage The {@link AirbyteStateMessage} that contains the initial state + * provided to the state manager. + * @return The {@link CdcState} stored in the state, if any. Note that this will not be {@code null} + * but may be empty. + */ + private CdcState extractCdcState(final AirbyteStateMessage airbyteStateMessage) { + if (airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { + return Jsons.object(airbyteStateMessage.getGlobal().getSharedState(), CdcState.class); + } else { + return Jsons.object(airbyteStateMessage.getData(), DbState.class).getCdcState(); + } + } + + /** + * Generates the {@link Supplier} that will be used to extract the streams from the incoming + * {@link AirbyteStateMessage}. + * + * @param airbyteStateMessage The {@link AirbyteStateMessage} supplied to this state manager with + * the initial state. + * @return A {@link Supplier} that will be used to fetch the streams present in the initial state. + */ + private static Supplier> getStreamsSupplier(final AirbyteStateMessage airbyteStateMessage) { + /* + * If the incoming message has the state type set to GLOBAL, it is using the new format. Therefore, + * we can look for streams in the "global" field of the message. Otherwise, the message is still + * storing state in the legacy "data" field. + */ + return () -> { + if (airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { + return airbyteStateMessage.getGlobal().getStreamStates(); + } else if (airbyteStateMessage.getData() != null) { + return Jsons.object(airbyteStateMessage.getData(), DbState.class).getStreams().stream() + .map(s -> new AirbyteStreamState().withStreamState(Jsons.jsonNode(s)) + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName()))) + .collect( + Collectors.toList()); + } else { + return List.of(); + } + }; + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java new file mode 100644 index 000000000000..64dabe9e07e2 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.base.Preconditions; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.List; +import java.util.Optional; +import java.util.function.Function; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Legacy implementation (pre-per-stream state support) of the {@link StateManager} interface. + * + * This implementation assumes that the state matches the {@link DbState} object and effectively + * tracks state as global across the streams managed by a connector. + * + * @deprecated This manager may be removed in the future if/once all connectors support per-stream + * state management. + */ +@Deprecated(forRemoval = true) +public class LegacyStateManager extends AbstractStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(LegacyStateManager.class); + + /** + * {@link Function} that extracts the cursor from the stream state. + */ + private static final Function CURSOR_FUNCTION = DbStreamState::getCursor; + + /** + * {@link Function} that extracts the cursor field(s) from the stream state. + */ + private static final Function> CURSOR_FIELD_FUNCTION = DbStreamState::getCursorField; + + /** + * {@link Function} that creates an {@link AirbyteStreamNameNamespacePair} from the stream state. + */ + private static final Function NAME_NAMESPACE_PAIR_FUNCTION = + s -> new AirbyteStreamNameNamespacePair(s.getStreamName(), s.getStreamNamespace()); + + /** + * Tracks whether the connector associated with this state manager supports CDC. + */ + private Boolean isCdc; + + /** + * {@link CdcStateManager} used to manage state for connectors that support CDC. + */ + private final CdcStateManager cdcStateManager; + + /** + * Constructs a new {@link LegacyStateManager} that is seeded with the provided {@link DbState} + * instance. + * + * @param dbState The initial state represented as an {@link DbState} instance. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state + * manager. + */ + public LegacyStateManager(final DbState dbState, final ConfiguredAirbyteCatalog catalog) { + super(catalog, + () -> dbState.getStreams(), + CURSOR_FUNCTION, + CURSOR_FIELD_FUNCTION, + NAME_NAMESPACE_PAIR_FUNCTION); + + this.cdcStateManager = new CdcStateManager(dbState.getCdcState()); + this.isCdc = dbState.getCdc(); + if (dbState.getCdc() == null) { + this.isCdc = false; + } + } + + @Override + public CdcStateManager getCdcStateManager() { + return cdcStateManager; + } + + @Override + public AirbyteStateMessage toState(final Optional pair) { + final DbState dbState = StateGeneratorUtils.generateDbState(getPairToCursorInfoMap()) + .withCdc(isCdc) + .withCdcState(getCdcStateManager().getCdcState()); + + LOGGER.info("Generated legacy state for {} streams", dbState.getStreams().size()); + return new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(dbState)); + } + + @Override + public AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pair, final String cursor) { + // cdc file gets updated by debezium so the "update" part is a no op. + if (!isCdc) { + final Optional cursorInfo = getCursorInfo(pair); + Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); + cursorInfo.get().setCursor(cursor); + } + + return toState(Optional.ofNullable(pair)); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java new file mode 100644 index 000000000000..493defb95e9f --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.collect.Lists; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteGlobalState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Collection of utilities that facilitate the generation of state objects. + */ +public class StateGeneratorUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(StateGeneratorUtils.class); + + /** + * {@link Function} that extracts the cursor from the stream state. + */ + public static final Function CURSOR_FUNCTION = stream -> { + final Optional dbStreamState = StateGeneratorUtils.extractState(stream); + return dbStreamState.map(DbStreamState::getCursor).orElse(null); + }; + + /** + * {@link Function} that extracts the cursor field(s) from the stream state. + */ + public static final Function> CURSOR_FIELD_FUNCTION = stream -> { + final Optional dbStreamState = StateGeneratorUtils.extractState(stream); + if (dbStreamState.isPresent()) { + return dbStreamState.get().getCursorField(); + } else { + return List.of(); + } + }; + + /** + * {@link Function} that creates an {@link AirbyteStreamNameNamespacePair} from the stream state. + */ + public static final Function NAME_NAMESPACE_PAIR_FUNCTION = + s -> isValidStreamDescriptor(s.getStreamDescriptor()) + ? new AirbyteStreamNameNamespacePair(s.getStreamDescriptor().getName(), s.getStreamDescriptor().getNamespace()) + : null; + + private StateGeneratorUtils() {} + + /** + * Generates the stream state for the given stream and cursor information. + * + * @param airbyteStreamNameNamespacePair The stream. + * @param cursorInfo The current cursor. + * @return The {@link AirbyteStreamState} representing the current state of the stream. + */ + public static AirbyteStreamState generateStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, + final CursorInfo cursorInfo) { + return new AirbyteStreamState() + .withStreamDescriptor( + new StreamDescriptor().withName(airbyteStreamNameNamespacePair.getName()).withNamespace(airbyteStreamNameNamespacePair.getNamespace())) + .withStreamState(Jsons.jsonNode(generateDbStreamState(airbyteStreamNameNamespacePair, cursorInfo))); + } + + /** + * Generates a list of valid stream states from the provided stream and cursor information. A stream + * state is considered to be valid if the stream has a valid descriptor (see + * {@link #isValidStreamDescriptor(StreamDescriptor)} for more details). + * + * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor + * information for that stream + * @return The list of stream states derived from the state information extracted from the provided + * map. + */ + public static List generateStreamStateList(final Map pairToCursorInfoMap) { + return pairToCursorInfoMap.entrySet().stream() + .sorted(Entry.comparingByKey()) + .map(e -> generateStreamState(e.getKey(), e.getValue())) + .filter(s -> isValidStreamDescriptor(s.getStreamDescriptor())) + .collect(Collectors.toList()); + } + + /** + * Generates the legacy global state for backwards compatibility. + * + * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor + * information for that stream + * @return The legacy {@link DbState}. + */ + public static DbState generateDbState(final Map pairToCursorInfoMap) { + return new DbState() + .withCdc(false) + .withStreams(pairToCursorInfoMap.entrySet().stream() + .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. + .map(e -> generateDbStreamState(e.getKey(), e.getValue())) + .collect(Collectors.toList())); + } + + /** + * Generates the {@link DbStreamState} for the given stream and cursor. + * + * @param airbyteStreamNameNamespacePair The stream. + * @param cursorInfo The current cursor. + * @return The {@link DbStreamState}. + */ + public static DbStreamState generateDbStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, + final CursorInfo cursorInfo) { + return new DbStreamState() + .withStreamName(airbyteStreamNameNamespacePair.getName()) + .withStreamNamespace(airbyteStreamNameNamespacePair.getNamespace()) + .withCursorField(cursorInfo.getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(cursorInfo.getCursorField())) + .withCursor(cursorInfo.getCursor()); + } + + /** + * Extracts the actual state from the {@link AirbyteStreamState} object. + * + * @param state The {@link AirbyteStreamState} that contains the actual stream state as JSON. + * @return An {@link Optional} possibly containing the deserialized representation of the stream + * state or an empty {@link Optional} if the state is not present or could not be + * deserialized. + */ + public static Optional extractState(final AirbyteStreamState state) { + try { + return Optional.ofNullable(Jsons.object(state.getStreamState(), DbStreamState.class)); + } catch (final IllegalArgumentException e) { + LOGGER.error("Unable to extract state.", e); + return Optional.empty(); + } + } + + /** + * Tests whether the provided {@link StreamDescriptor} is valid. A valid descriptor is defined as + * one that has a non-{@code null} name. + * + * See https://github.com/airbytehq/airbyte/blob/e63458fabb067978beb5eaa74d2bc130919b419f/docs/understanding-airbyte/airbyte-protocol.md + * for more details + * + * @param streamDescriptor A {@link StreamDescriptor} to be validated. + * @return {@code true} if the provided {@link StreamDescriptor} is valid or {@code false} if it is + * invalid. + */ + public static boolean isValidStreamDescriptor(final StreamDescriptor streamDescriptor) { + if (streamDescriptor != null) { + return streamDescriptor.getName() != null; + } else { + return false; + } + } + + /** + * Converts a {@link AirbyteStateType#LEGACY} state message into a {@link AirbyteStateType#GLOBAL} + * message. + * + * @param airbyteStateMessage A {@link AirbyteStateType#LEGACY} state message. + * @return A {@link AirbyteStateType#GLOBAL} state message. + */ + public static AirbyteStateMessage convertLegacyStateToGlobalState(final AirbyteStateMessage airbyteStateMessage) { + final DbState dbState = Jsons.object(airbyteStateMessage.getData(), DbState.class); + final AirbyteGlobalState globalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(dbState.getCdcState())) + .withStreamStates(dbState.getStreams().stream() + .map(s -> new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(s.getStreamName()).withNamespace(s.getStreamNamespace())) + .withStreamState(Jsons.jsonNode(s))) + .collect( + Collectors.toList())); + return new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); + } + + /** + * Converts a {@link AirbyteStateType#GLOBAL} state message into a list of + * {@link AirbyteStateType#STREAM} messages. + * + * @param airbyteStateMessage A {@link AirbyteStateType#GLOBAL} state message. + * @return A list {@link AirbyteStateType#STREAM} state messages. + */ + public static List convertGlobalStateToStreamState(final AirbyteStateMessage airbyteStateMessage) { + return airbyteStateMessage.getGlobal().getStreamStates().stream() + .map(s -> new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(s.getStreamDescriptor()).withStreamState(s.getStreamState()))) + .collect(Collectors.toList()); + } + + /** + * Converts a {@link AirbyteStateType#LEGACY} state message into a list of + * {@link AirbyteStateType#STREAM} messages. + * + * @param airbyteStateMessage A {@link AirbyteStateType#LEGACY} state message. + * @return A list {@link AirbyteStateType#STREAM} state messages. + */ + public static List convertLegacyStateToStreamState(final AirbyteStateMessage airbyteStateMessage) { + return Jsons.object(airbyteStateMessage.getData(), DbState.class).getStreams().stream() + .map(s -> new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s)))) + .collect(Collectors.toList()); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java new file mode 100644 index 000000000000..a4234454b06f --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.base.Preconditions; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.protocol.models.AirbyteStateMessage; +import java.util.Map; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Defines a manager that manages connector state. Connector state is used to keep track of the data + * synced by the connector. + * + * @param The type of the state maintained by the manager. + * @param The type of the stream(s) stored within the state maintained by the manager. + */ +public interface StateManager { + + Logger LOGGER = LoggerFactory.getLogger(StateManager.class); + + /** + * Retrieves the {@link CdcStateManager} associated with the state manager. + * + * @return The {@link CdcStateManager} + * @throws UnsupportedOperationException if the state manager does not support tracking change data + * capture (CDC) state. + */ + CdcStateManager getCdcStateManager(); + + /** + * Retrieves the map of stream name/namespace tuple to the current cursor information for that + * stream. + * + * @return The map of stream name/namespace tuple to the current cursor information for that stream + * as maintained by this state manager. + */ + Map getPairToCursorInfoMap(); + + /** + * Generates an {@link AirbyteStateMessage} that represents the current state contained in the state + * manager. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the + * state manager. + * @return The {@link AirbyteStateMessage} that represents the current state contained in the state + * manager. + */ + AirbyteStateMessage toState(final Optional pair); + + /** + * Retrieves an {@link Optional} possibly containing the cursor value tracked in the state + * associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the cursor value tracked in the state associated + * with the provided stream name/namespace tuple. + */ + default Optional getCursor(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getCursor); + } + + /** + * Retrieves an {@link Optional} possibly containing the cursor field name associated with the + * cursor tracked in the state associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the cursor field name associated with the cursor + * tracked in the state associated with the provided stream name/namespace tuple. + */ + default Optional getCursorField(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getCursorField); + } + + /** + * Retrieves an {@link Optional} possibly containing the original cursor value tracked in the state + * associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the original cursor value tracked in the state + * associated with the provided stream name/namespace tuple. + */ + default Optional getOriginalCursor(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getOriginalCursor); + } + + /** + * Retrieves an {@link Optional} possibly containing the original cursor field name associated with + * the cursor tracked in the state associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the original cursor field name associated with + * the cursor tracked in the state associated with the provided stream name/namespace tuple. + */ + default Optional getOriginalCursorField(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getOriginalCursorField); + } + + /** + * Retrieves the current cursor information stored in the state manager for the steam name/namespace + * tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the + * state manager. + * @return {@link Optional} that potentially contains the current cursor information for the given + * stream name/namespace tuple. + */ + default Optional getCursorInfo(final AirbyteStreamNameNamespacePair pair) { + return Optional.ofNullable(getPairToCursorInfoMap().get(pair)); + } + + /** + * Emits the current state maintained by the manager as an {@link AirbyteStateMessage}. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the + * state manager. + * @return An {@link AirbyteStateMessage} that represents the current state maintained by the state + * manager. + */ + default AirbyteStateMessage emit(final Optional pair) { + return toState(pair); + } + + /** + * Updates the cursor associated with the provided stream name/namespace pair and emits the current + * state maintained by the state manager. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the + * state manager. + * @param cursor The new value for the cursor associated with the + * {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the state + * manager. + * @return An {@link AirbyteStateMessage} that represents the current state maintained by the state + * manager. + */ + default AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pair, final String cursor) { + final Optional cursorInfo = getCursorInfo(pair); + Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); + LOGGER.debug("Updating cursor value for {} to {}...", pair, cursor); + cursorInfo.get().setCursor(cursor); + return emit(Optional.ofNullable(pair)); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java new file mode 100644 index 000000000000..a5dddedc9ebe --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.ArrayList; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Factory class that creates {@link StateManager} instances based on the provided state. + */ +public class StateManagerFactory { + + private static final Logger LOGGER = LoggerFactory.getLogger(StateManagerFactory.class); + + /** + * Private constructor to prevent direct instantiation. + */ + private StateManagerFactory() {} + + /** + * Creates a {@link StateManager} based on the provided state object and catalog. This method will + * handle the conversion of the provided state to match the requested state manager based on the + * provided {@link AirbyteStateType}. + * + * @param supportedStateType The type of state supported by the connector. + * @param initialState The deserialized initial state that will be provided to the selected + * {@link StateManager}. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector that will utilize the state + * manager. + * @return A newly created {@link StateManager} implementation based on the provided state. + */ + public static StateManager createStateManager(final AirbyteStateType supportedStateType, + final List initialState, + final ConfiguredAirbyteCatalog catalog) { + if (initialState != null && !initialState.isEmpty()) { + final AirbyteStateMessage airbyteStateMessage = initialState.get(0); + switch (supportedStateType) { + case LEGACY: + LOGGER.info("Legacy state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + return new LegacyStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); + case GLOBAL: + LOGGER.info("Global state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + return new GlobalStateManager(generateGlobalState(airbyteStateMessage), catalog); + case STREAM: + default: + LOGGER.info("Stream state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + return new StreamStateManager(generateStreamState(initialState), catalog); + } + } else { + throw new IllegalArgumentException("Failed to create state manager due to empty state list."); + } + } + + /** + * Handles the conversion between a different state type and the global state. This method handles + * the following transitions: + *
    + *
  • Stream -> Global (not supported, results in {@link IllegalArgumentException}
  • + *
  • Legacy -> Global (supported)
  • + *
  • Global -> Global (supported/no conversion required)
  • + *
+ * + * @param airbyteStateMessage The current state that is to be converted to global state. + * @return The converted state message. + * @throws IllegalArgumentException if unable to convert between the given state type and global. + */ + private static AirbyteStateMessage generateGlobalState(final AirbyteStateMessage airbyteStateMessage) { + AirbyteStateMessage globalStateMessage = airbyteStateMessage; + + switch (airbyteStateMessage.getStateType()) { + case STREAM: + throw new IllegalArgumentException("Unable to convert connector state from stream to global. Please reset the connection to continue."); + case LEGACY: + globalStateMessage = StateGeneratorUtils.convertLegacyStateToGlobalState(airbyteStateMessage); + LOGGER.info("Legacy state converted to global state.", airbyteStateMessage.getStateType()); + break; + case GLOBAL: + default: + break; + } + + return globalStateMessage; + } + + /** + * Handles the conversion between a different state type and the stream state. This method handles + * the following transitions: + *
    + *
  • Global -> Stream (not supported, results in {@link IllegalArgumentException}
  • + *
  • Legacy -> Stream (supported)
  • + *
  • Stream -> Stream (supported/no conversion required)
  • + *
+ * + * @param states The list of current states. + * @return The converted state messages. + * @throws IllegalArgumentException if unable to convert between the given state type and stream. + */ + private static List generateStreamState(final List states) { + final AirbyteStateMessage airbyteStateMessage = states.get(0); + final List streamStates = new ArrayList<>(); + switch (airbyteStateMessage.getStateType()) { + case GLOBAL: + throw new IllegalArgumentException("Unable to convert connector state from global to stream. Please reset the connection to continue."); + case LEGACY: + streamStates.addAll(StateGeneratorUtils.convertLegacyStateToStreamState(airbyteStateMessage)); + break; + case STREAM: + default: + streamStates.addAll(states); + break; + } + + return streamStates; + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java new file mode 100644 index 000000000000..9fee0a39ab6c --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FIELD_FUNCTION; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FUNCTION; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.NAME_NAMESPACE_PAIR_FUNCTION; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Per-stream implementation of the {@link StateManager} interface. + * + * This implementation generates a state object for each stream detected in catalog/map of known + * streams to cursor information stored in this manager. + */ +public class StreamStateManager extends AbstractStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(StreamStateManager.class); + + /** + * Constructs a new {@link StreamStateManager} that is seeded with the provided + * {@link AirbyteStateMessage}. + * + * @param airbyteStateMessages The initial state represented as a list of + * {@link AirbyteStateMessage}s. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state + * manager. + */ + public StreamStateManager(final List airbyteStateMessages, final ConfiguredAirbyteCatalog catalog) { + super(catalog, + () -> airbyteStateMessages.stream().map(a -> a.getStream()).collect(Collectors.toList()), + CURSOR_FUNCTION, + CURSOR_FIELD_FUNCTION, + NAME_NAMESPACE_PAIR_FUNCTION); + } + + @Override + public CdcStateManager getCdcStateManager() { + throw new UnsupportedOperationException("CDC state management not supported by stream state manager."); + } + + @Override + public AirbyteStateMessage toState(final Optional pair) { + if (pair.isPresent()) { + final Map pairToCursorInfoMap = getPairToCursorInfoMap(); + final Optional cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair.get())); + + if (cursorInfo.isPresent()) { + LOGGER.debug("Generating state message for {}...", pair); + return new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + // Temporarily include legacy state for backwards compatibility with the platform + .withData(Jsons.jsonNode(StateGeneratorUtils.generateDbState(pairToCursorInfoMap))) + .withStream(StateGeneratorUtils.generateStreamState(pair.get(), cursorInfo.get())); + } else { + LOGGER.warn("Cursor information could not be located in state for stream {}. Returning a new, empty state message...", pair); + return new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); + } + } else { + LOGGER.warn("Stream not provided. Returning a new, empty state message..."); + return new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); + } + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java index 7fb6964d2654..e464a95e40fa 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java @@ -14,6 +14,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java deleted file mode 100644 index 9e64edb55b7e..000000000000 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.relationaldb; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.relationaldb.models.DbState; -import io.airbyte.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.AirbyteStream; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import java.util.Collections; -import java.util.Comparator; -import java.util.Optional; -import java.util.stream.Collectors; -import org.junit.jupiter.api.Test; -import org.testcontainers.shaded.com.google.common.collect.Lists; - -class StateManagerTest { - - private static final String NAMESPACE = "public"; - private static final String STREAM_NAME1 = "cars"; - private static final AirbyteStreamNameNamespacePair NAME_NAMESPACE_PAIR1 = new AirbyteStreamNameNamespacePair(STREAM_NAME1, NAMESPACE); - private static final String STREAM_NAME2 = "bicycles"; - private static final AirbyteStreamNameNamespacePair NAME_NAMESPACE_PAIR2 = new AirbyteStreamNameNamespacePair(STREAM_NAME2, NAMESPACE); - private static final String STREAM_NAME3 = "stationary_bicycles"; - private static final String CURSOR_FIELD1 = "year"; - private static final String CURSOR_FIELD2 = "generation"; - private static final String CURSOR = "2000"; - - @Test - void testCreateCursorInfoCatalogAndStateSameCursorField() { - final CursorInfo actual = - StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(CURSOR_FIELD1)); - assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD1, CURSOR), actual); - } - - @Test - void testCreateCursorInfoCatalogAndStateSameCursorFieldButNoCursor() { - final CursorInfo actual = - StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, null), getCatalog(CURSOR_FIELD1)); - assertEquals(new CursorInfo(CURSOR_FIELD1, null, CURSOR_FIELD1, null), actual); - } - - @Test - void testCreateCursorInfoCatalogAndStateChangeInCursorFieldName() { - final CursorInfo actual = - StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(CURSOR_FIELD2)); - assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD2, null), actual); - } - - @Test - void testCreateCursorInfoCatalogAndNoState() { - final CursorInfo actual = StateManager - .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, Optional.empty(), getCatalog(CURSOR_FIELD1)); - assertEquals(new CursorInfo(null, null, CURSOR_FIELD1, null), actual); - } - - @Test - void testCreateCursorInfoStateAndNoCatalog() { - final CursorInfo actual = StateManager - .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), Optional.empty()); - assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); - } - - // this is what full refresh looks like. - @Test - void testCreateCursorInfoNoCatalogAndNoState() { - final CursorInfo actual = StateManager - .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, Optional.empty(), Optional.empty()); - assertEquals(new CursorInfo(null, null, null, null), actual); - } - - @Test - void testCreateCursorInfoStateAndCatalogButNoCursorField() { - final CursorInfo actual = StateManager - .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(null)); - assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); - } - - @SuppressWarnings("SameParameterValue") - private static Optional getState(final String cursorField, final String cursor) { - return Optional.of(new DbStreamState() - .withStreamName(STREAM_NAME1) - .withCursorField(Lists.newArrayList(cursorField)) - .withCursor(cursor)); - } - - private static Optional getCatalog(final String cursorField) { - return Optional.of(new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1)) - .withCursorField(cursorField == null ? Collections.emptyList() : Lists.newArrayList(cursorField))); - } - - @Test - void testGetters() { - final DbState state = new DbState().withStreams(Lists.newArrayList( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) - .withCursor(CURSOR), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE))); - - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(Lists.newArrayList(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - - final StateManager stateManager = new StateManager(state, catalog); - - assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getCursorField(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR), stateManager.getCursor(NAME_NAMESPACE_PAIR1)); - - assertEquals(Optional.empty(), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getCursorField(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getCursor(NAME_NAMESPACE_PAIR2)); - } - - @Test - void testToState() { - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(Lists.newArrayList(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) - .withCursorField(Lists.newArrayList(CURSOR_FIELD2)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); - - final StateManager stateManager = new StateManager(new DbState(), catalog); - - final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState().withStreams(Lists - .newArrayList( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD2)), - new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - .withCdc(false))); - final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); - assertEquals(expectedFirstEmission, actualFirstEmission); - final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState().withStreams(Lists - .newArrayList( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD2)) - .withCursor("b"), - new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - .withCdc(false))); - final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); - assertEquals(expectedSecondEmission, actualSecondEmission); - } - - @Test - void testToStateNullCursorField() { - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(Lists.newArrayList(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - final StateManager stateManager = new StateManager(new DbState(), catalog); - - final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState().withStreams(Lists - .newArrayList( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - .withCdc(false))); - - final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); - assertEquals(expectedFirstEmission, actualFirstEmission); - } - -} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java new file mode 100644 index 000000000000..67b7fddc23f5 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getCatalog; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getState; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getStream; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import java.util.Collections; +import java.util.Optional; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link CursorManager} class. + */ +public class CursorManagerTest { + + @Test + void testCreateCursorInfoCatalogAndStateSameCursorField() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, CURSOR), + getStream(CURSOR_FIELD1), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD1, CURSOR), actual); + } + + @Test + void testCreateCursorInfoCatalogAndStateSameCursorFieldButNoCursor() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, null, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, null), + getStream(CURSOR_FIELD1), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, null, CURSOR_FIELD1, null), actual); + } + + @Test + void testCreateCursorInfoCatalogAndStateChangeInCursorFieldName() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, CURSOR), + getStream(CURSOR_FIELD2), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD2, null), actual); + } + + @Test + void testCreateCursorInfoCatalogAndNoState() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + Optional.empty(), + getStream(CURSOR_FIELD1), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(null, null, CURSOR_FIELD1, null), actual); + } + + @Test + void testCreateCursorInfoStateAndNoCatalog() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, CURSOR), + Optional.empty(), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); + } + + // this is what full refresh looks like. + @Test + void testCreateCursorInfoNoCatalogAndNoState() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + Optional.empty(), + Optional.empty(), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(null, null, null, null), actual); + } + + @Test + void testCreateCursorInfoStateAndCatalogButNoCursorField() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, CURSOR), + getStream(null), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); + } + + @Test + void testGetters() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actualCursorInfo = new CursorInfo(CURSOR_FIELD1, CURSOR, null, null); + + assertEquals(Optional.of(actualCursorInfo), cursorManager.getCursorInfo(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.empty(), cursorManager.getCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.empty(), cursorManager.getCursor(NAME_NAMESPACE_PAIR1)); + + assertEquals(Optional.empty(), cursorManager.getCursorInfo(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), cursorManager.getCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), cursorManager.getCursor(NAME_NAMESPACE_PAIR2)); + } + + private CursorManager createCursorManager(final String cursorField, + final String cursor, + final AirbyteStreamNameNamespacePair nameNamespacePair) { + final DbStreamState dbStreamState = getState(cursorField, cursor).get(); + return new CursorManager<>( + getCatalog(cursorField).orElse(null), + () -> Collections.singleton(dbStreamState), + DbStreamState::getCursor, + DbStreamState::getCursorField, + s -> nameNamespacePair); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManagerTest.java new file mode 100644 index 000000000000..c39ca83c16d2 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManagerTest.java @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mock; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteGlobalState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link GlobalStateManager} class. + */ +public class GlobalStateManagerTest { + + @Test + void testCdcStateManager() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final CdcState cdcState = new CdcState().withState(Jsons.jsonNode(Map.of("foo", "bar", "baz", 5))); + final AirbyteGlobalState globalState = new AirbyteGlobalState().withSharedState(Jsons.jsonNode(cdcState)) + .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace("namespace").withName("name")) + .withStreamState(Jsons.jsonNode(new DbStreamState())))); + final StateManager stateManager = + new GlobalStateManager(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState), catalog); + assertNotNull(stateManager.getCdcStateManager()); + assertEquals(cdcState, stateManager.getCdcStateManager().getCdcState()); + } + + @Test + void testToStateFromLegacyState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final CdcState cdcState = new CdcState().withState(Jsons.jsonNode(Map.of("foo", "bar", "baz", 5))); + final DbState dbState = new DbState() + .withCdc(true) + .withCdcState(cdcState) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final StateManager stateManager = new GlobalStateManager(new AirbyteStateMessage().withData(Jsons.jsonNode(dbState)), catalog); + + final DbState expectedDbState = new DbState() + .withCdc(true) + .withCdcState(cdcState) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + + final AirbyteGlobalState expectedGlobalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(cdcState)) + .withStreamStates(List.of( + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"))), + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)))), + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME3).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)))) + .stream().sorted(Comparator.comparing(o -> o.getStreamDescriptor().getName())).collect(Collectors.toList())); + final AirbyteStateMessage expected = new AirbyteStateMessage() + .withData(Jsons.jsonNode(expectedDbState)) + .withGlobal(expectedGlobalState) + .withStateType(AirbyteStateType.GLOBAL); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expected, actualFirstEmission); + } + + @Test + void testToState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final CdcState cdcState = new CdcState().withState(Jsons.jsonNode(Map.of("foo", "bar", "baz", 5))); + final AirbyteGlobalState globalState = new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState())).withStreamStates( + List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor()).withStreamState(Jsons.jsonNode(new DbStreamState())))); + final StateManager stateManager = + new GlobalStateManager(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState), catalog); + stateManager.getCdcStateManager().setCdcState(cdcState); + + final DbState expectedDbState = new DbState() + .withCdc(true) + .withCdcState(cdcState) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + + final AirbyteGlobalState expectedGlobalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(cdcState)) + .withStreamStates(List.of( + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"))), + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)))), + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME3).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)))) + .stream().sorted(Comparator.comparing(o -> o.getStreamDescriptor().getName())).collect(Collectors.toList())); + final AirbyteStateMessage expected = new AirbyteStateMessage() + .withData(Jsons.jsonNode(expectedDbState)) + .withGlobal(expectedGlobalState) + .withStateType(AirbyteStateType.GLOBAL); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expected, actualFirstEmission); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java new file mode 100644 index 000000000000..cbf41a7415e4 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mock; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link LegacyStateManager} class. + */ +public class LegacyStateManagerTest { + + @Test + void testGetters() { + final DbState state = new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor(CURSOR), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE))); + + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new LegacyStateManager(state, catalog); + + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getCursor(NAME_NAMESPACE_PAIR1)); + + assertEquals(Optional.empty(), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursor(NAME_NAMESPACE_PAIR2)); + } + + @Test + void testToState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new LegacyStateManager(new DbState(), catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(false))); + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(false))); + final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); + assertEquals(expectedSecondEmission, actualSecondEmission); + } + + @Test + void testToStateNullCursorField() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + final StateManager stateManager = new LegacyStateManager(new DbState(), catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(false))); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + } + + @Test + void testCursorNotUpdatedForCdc() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + + final DbState state = new DbState(); + state.setCdc(true); + final StateManager stateManager = new LegacyStateManager(state, catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor(null), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of())) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(true))); + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor(null), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of()) + .withCursor(null)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(true))); + final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); + assertEquals(expectedSecondEmission, actualSecondEmission); + } + + @Test + void testCdcStateManager() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final CdcState cdcState = new CdcState().withState(Jsons.jsonNode(Map.of("foo", "bar", "baz", 5))); + final DbState dbState = new DbState().withCdcState(cdcState).withStreams(List.of( + new DbStreamState().withStreamNamespace(NAMESPACE).withStreamName(STREAM_NAME1))); + final StateManager stateManager = new LegacyStateManager(dbState, catalog); + assertNotNull(stateManager.getCdcStateManager()); + assertEquals(cdcState, stateManager.getCdcStateManager().getCdcState()); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java new file mode 100644 index 000000000000..9ac94775c928 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.airbyte.protocol.models.StreamDescriptor; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link StateGeneratorUtils} class. + */ +public class StateGeneratorUtilsTest { + + @Test + void testValidStreamDescriptor() { + final StreamDescriptor streamDescriptor1 = null; + final StreamDescriptor streamDescriptor2 = new StreamDescriptor(); + final StreamDescriptor streamDescriptor3 = new StreamDescriptor().withName("name"); + final StreamDescriptor streamDescriptor4 = new StreamDescriptor().withNamespace("namespace"); + final StreamDescriptor streamDescriptor5 = new StreamDescriptor().withName("name").withNamespace("namespace"); + final StreamDescriptor streamDescriptor6 = new StreamDescriptor().withName("name").withNamespace(""); + final StreamDescriptor streamDescriptor7 = new StreamDescriptor().withName("").withNamespace("namespace"); + final StreamDescriptor streamDescriptor8 = new StreamDescriptor().withName("").withNamespace(""); + + assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor1)); + assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor2)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor3)); + assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor4)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor5)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor6)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor7)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor8)); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java new file mode 100644 index 000000000000..0127b068915a --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteGlobalState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.List; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link StateManagerFactory} class. + */ +public class StateManagerFactoryTest { + + private static final String NAMESPACE = "namespace"; + private static final String NAME = "name"; + + @Test + void testNullOrEmptyState() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, null, catalog); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(), catalog); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(AirbyteStateType.LEGACY, null, catalog); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(AirbyteStateType.LEGACY, List.of(), catalog); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(AirbyteStateType.STREAM, null, catalog); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(), catalog); + }); + } + + @Test + void testLegacyStateManagerCreationFromAirbyteStateMessage() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); + when(airbyteStateMessage.getData()).thenReturn(Jsons.jsonNode(new DbState())); + + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.LEGACY, List.of(airbyteStateMessage), catalog); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); + } + + @Test + void testGlobalStateManagerCreation() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteGlobalState globalState = + new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState().withCdcState(new CdcState().withState(Jsons.jsonNode(new DbState()))))) + .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(NAMESPACE).withName(NAME)) + .withStreamState(Jsons.jsonNode(new DbStreamState())))); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); + + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(airbyteStateMessage), catalog); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); + } + + @Test + void testGlobalStateManagerCreationFromLegacyState() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final CdcState cdcState = new CdcState(); + final DbState dbState = new DbState() + .withCdcState(cdcState) + .withStreams(List.of(new DbStreamState().withStreamName(NAME).withStreamNamespace(NAMESPACE))); + final AirbyteStateMessage airbyteStateMessage = + new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(dbState)); + + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(airbyteStateMessage), catalog); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); + } + + @Test + void testGlobalStateManagerCreationFromStreamState() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( + NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))); + + Assertions.assertThrows(IllegalArgumentException.class, + () -> StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(airbyteStateMessage), catalog)); + } + + @Test + void testGlobalStateManagerCreationWithLegacyDataPresent() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteGlobalState globalState = + new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState().withCdcState(new CdcState().withState(Jsons.jsonNode(new DbState()))))) + .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(NAMESPACE).withName(NAME)) + .withStreamState(Jsons.jsonNode(new DbStreamState())))); + final AirbyteStateMessage airbyteStateMessage = + new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState).withData(Jsons.jsonNode(new DbState())); + + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(airbyteStateMessage), catalog); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); + } + + @Test + void testStreamStateManagerCreation() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( + NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))); + + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); + } + + @Test + void testStreamStateManagerCreationFromLegacy() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final CdcState cdcState = new CdcState(); + final DbState dbState = new DbState() + .withCdcState(cdcState) + .withStreams(List.of(new DbStreamState().withStreamName(NAME).withStreamNamespace(NAMESPACE))); + final AirbyteStateMessage airbyteStateMessage = + new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(dbState)); + + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); + } + + @Test + void testStreamStateManagerCreationFromGlobal() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteGlobalState globalState = + new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState().withCdcState(new CdcState().withState(Jsons.jsonNode(new DbState()))))) + .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(NAMESPACE).withName(NAME)) + .withStreamState(Jsons.jsonNode(new DbStreamState())))); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); + + Assertions.assertThrows(IllegalArgumentException.class, + () -> StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog)); + } + + @Test + void testStreamStateManagerCreationWithLegacyDataPresent() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( + NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))) + .withData(Jsons.jsonNode(new DbState())); + + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateTestConstants.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateTestConstants.java new file mode 100644 index 000000000000..e939c9aea87d --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateTestConstants.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import org.testcontainers.shaded.com.google.common.collect.Lists; + +/** + * Collection of constants for use in state management-related tests. + */ +public final class StateTestConstants { + + public static final String NAMESPACE = "public"; + public static final String STREAM_NAME1 = "cars"; + public static final AirbyteStreamNameNamespacePair NAME_NAMESPACE_PAIR1 = new AirbyteStreamNameNamespacePair(STREAM_NAME1, NAMESPACE); + public static final String STREAM_NAME2 = "bicycles"; + public static final AirbyteStreamNameNamespacePair NAME_NAMESPACE_PAIR2 = new AirbyteStreamNameNamespacePair(STREAM_NAME2, NAMESPACE); + public static final String STREAM_NAME3 = "stationary_bicycles"; + public static final String CURSOR_FIELD1 = "year"; + public static final String CURSOR_FIELD2 = "generation"; + public static final String CURSOR = "2000"; + + private StateTestConstants() {} + + @SuppressWarnings("SameParameterValue") + public static Optional getState(final String cursorField, final String cursor) { + return Optional.of(new DbStreamState() + .withStreamName(STREAM_NAME1) + .withCursorField(Lists.newArrayList(cursorField)) + .withCursor(cursor)); + } + + public static Optional getCatalog(final String cursorField) { + return Optional.of(new ConfiguredAirbyteCatalog() + .withStreams(List.of(getStream(cursorField).orElse(null)))); + } + + public static Optional getStream(final String cursorField) { + return Optional.of(new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1)) + .withCursorField(cursorField == null ? Collections.emptyList() : Lists.newArrayList(cursorField))); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java new file mode 100644 index 000000000000..704dc665cf0d --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.mockito.Mockito.mock; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link StreamStateManager} class. + */ +public class StreamStateManagerTest { + + @Test + void testCreationFromInvalidState() { + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode("Not a state object"))); + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + + Assertions.assertDoesNotThrow(() -> { + final StateManager stateManager = new StreamStateManager(List.of(airbyteStateMessage), catalog); + assertNotNull(stateManager); + }); + } + + @Test + void testGetters() { + final List state = new ArrayList<>(); + state.add(createStreamState(STREAM_NAME1, NAMESPACE, List.of(CURSOR_FIELD1), CURSOR)); + state.add(createStreamState(STREAM_NAME2, NAMESPACE, List.of(), null)); + + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new StreamStateManager(state, catalog); + + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getCursor(NAME_NAMESPACE_PAIR1)); + + assertEquals(Optional.empty(), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursor(NAME_NAMESPACE_PAIR2)); + } + + @Test + void testToState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); + + final DbState expectedFirstDbState = new DbState() + .withCdc(false) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final AirbyteStateMessage expectedFirstEmission = + createStreamState(STREAM_NAME1, NAMESPACE, List.of(CURSOR_FIELD1), "a").withData(Jsons.jsonNode(expectedFirstDbState)); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + + final DbState expectedSecondDbState = new DbState() + .withCdc(false) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final AirbyteStateMessage expectedSecondEmission = + createStreamState(STREAM_NAME2, NAMESPACE, List.of(CURSOR_FIELD2), "b").withData(Jsons.jsonNode(expectedSecondDbState)); + + final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); + assertEquals(expectedSecondEmission, actualSecondEmission); + } + + @Test + void testToStateWithoutCursorInfo() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair = new AirbyteStreamNameNamespacePair("other", "other"); + + final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); + final AirbyteStateMessage airbyteStateMessage = stateManager.toState(Optional.of(airbyteStreamNameNamespacePair)); + assertNotNull(airbyteStateMessage); + assertEquals(AirbyteStateType.STREAM, airbyteStateMessage.getStateType()); + assertNotNull(airbyteStateMessage.getStream()); + } + + @Test + void testToStateWithoutStreamPair() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); + final AirbyteStateMessage airbyteStateMessage = stateManager.toState(Optional.empty()); + assertNotNull(airbyteStateMessage); + assertEquals(AirbyteStateType.STREAM, airbyteStateMessage.getStateType()); + assertNotNull(airbyteStateMessage.getStream()); + assertNull(airbyteStateMessage.getStream().getStreamState()); + } + + @Test + void testToStateNullCursorField() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); + + final DbState expectedFirstDbState = new DbState() + .withCdc(false) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + + final AirbyteStateMessage expectedFirstEmission = + createStreamState(STREAM_NAME1, NAMESPACE, List.of(CURSOR_FIELD1), "a").withData(Jsons.jsonNode(expectedFirstDbState)); + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + } + + @Test + void testCdcStateManager() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final StateManager stateManager = new StreamStateManager( + List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState())), catalog); + Assertions.assertThrows(UnsupportedOperationException.class, () -> stateManager.getCdcStateManager()); + } + + private List createDefaultState() { + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState())); + } + + private AirbyteStateMessage createStreamState(final String name, + final String namespace, + final List cursorFields, + final String cursorValue) { + final DbStreamState dbStreamState = new DbStreamState() + .withStreamName(name) + .withStreamNamespace(namespace); + + if (cursorFields != null && !cursorFields.isEmpty()) { + dbStreamState.withCursorField(cursorFields); + } + + if (cursorValue != null) { + dbStreamState.withCursor(cursorValue); + } + + return new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(name).withNamespace(namespace)) + .withStreamState(Jsons.jsonNode(dbStreamState))); + } + +}