From a2db8c08ef0607277ea10e6c6036d5109a1f93b5 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Wed, 8 Jun 2022 10:32:06 -0400 Subject: [PATCH 01/34] WIP Per-stream state support for Postgres source --- .../jdbc/test/JdbcSourceAcceptanceTest.java | 295 +++++++++++------- .../source/mssql/MssqlCdcStateHandler.java | 2 +- .../source/mssql/MssqlSource.java | 2 +- .../source/mysql/MySqlCdcStateHandler.java | 2 +- .../source/mysql/MySqlSource.java | 2 +- .../postgres/PostgresCdcStateHandler.java | 2 +- .../source/postgres/PostgresSource.java | 23 +- .../PostgresJdbcSourceAcceptanceTest.java | 33 +- .../source/relationaldb/AbstractDbSource.java | 27 +- .../source/relationaldb/CdcStateManager.java | 6 +- .../relationaldb/StateDecoratingIterator.java | 2 +- .../source/relationaldb/StateManager.java | 197 ------------ .../state/AbstractStateManager.java | 62 ++++ .../relationaldb/state/CursorManager.java | 222 +++++++++++++ .../state/LegacyStateManager.java | 122 ++++++++ .../state/PerStreamStateManager.java | 142 +++++++++ .../relationaldb/state/StateManager.java | 142 +++++++++ .../state/StateManagerFactory.java | 75 +++++ .../StateDecoratingIteratorTest.java | 1 + .../source/relationaldb/StateManagerTest.java | 192 ------------ .../relationaldb/state/CursorManagerTest.java | 168 ++++++++++ .../state/LegacyStateManagerTest.java | 160 ++++++++++ .../state/PerStreamStateManagerTest.java | 248 +++++++++++++++ .../state/StateManagerFactoryTest.java | 102 ++++++ .../state/StateTestConstants.java | 53 ++++ 25 files changed, 1743 insertions(+), 539 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java delete mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateTestConstants.java diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index 802d8ac79bc7..3670d13c224a 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -13,10 +13,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Lists; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; @@ -39,7 +35,9 @@ import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; @@ -54,6 +52,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.function.Function; @@ -82,7 +81,7 @@ public abstract class JdbcSourceAcceptanceTest { // otherwise parallel runs can interfere with each other public static String SCHEMA_NAME = Strings.addRandomSuffix("jdbc_integration_test1", "_", 5).toLowerCase(); public static String SCHEMA_NAME2 = Strings.addRandomSuffix("jdbc_integration_test2", "_", 5).toLowerCase(); - public static Set TEST_SCHEMAS = ImmutableSet.of(SCHEMA_NAME, SCHEMA_NAME2); + public static Set TEST_SCHEMAS = Set.of(SCHEMA_NAME, SCHEMA_NAME2); public static String TABLE_NAME = "id_and_name"; public static String TABLE_NAME_WITH_SPACES = "id and name"; @@ -255,7 +254,7 @@ public void setup() throws Exception { connection.createStatement().execute( createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), COLUMN_CLAUSE_WITH_COMPOSITE_PK, - primaryKeyClause(ImmutableList.of("first_name", "last_name")))); + primaryKeyClause(List.of("first_name", "last_name")))); connection.createStatement().execute( String.format( "INSERT INTO %s(first_name, last_name, updated_at) VALUES ('first' ,'picard', '2004-10-19')", @@ -359,7 +358,7 @@ void testDiscoverWithMultipleSchemas() throws Exception { SCHEMA_NAME2, Field.of(COL_ID, JsonSchemaType.STRING), Field.of(COL_NAME, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))); + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))); // sort streams by name so that we are comparing lists with the same order. final Comparator schemaTableCompare = Comparator.comparing(stream -> stream.getNamespace() + "." + stream.getName()); expected.getStreams().sort(schemaTableCompare); @@ -389,9 +388,9 @@ void testReadOneColumn() throws Exception { setEmittedAtToNull(actualMessages); final List expectedMessages = getAirbyteMessagesReadOneColumn(); - assertTrue(expectedMessages.size() == actualMessages.size()); - assertTrue(expectedMessages.containsAll(actualMessages)); - assertTrue(actualMessages.containsAll(expectedMessages)); + assertEquals(actualMessages.size(), expectedMessages.size()); + assertEquals(actualMessages, expectedMessages); + assertEquals(expectedMessages, actualMessages); } protected List getAirbyteMessagesReadOneColumn() { @@ -446,12 +445,12 @@ void testReadMultipleTables() throws Exception { setEmittedAtToNull(actualMessages); - assertTrue(expectedMessages.size() == actualMessages.size()); - assertTrue(expectedMessages.containsAll(actualMessages)); - assertTrue(actualMessages.containsAll(expectedMessages)); + assertEquals(actualMessages.size(), expectedMessages.size()); + assertEquals(actualMessages, expectedMessages); + assertEquals(expectedMessages, actualMessages); } - protected List getAirbyteMessagesSecondSync(String streamName2) { + protected List getAirbyteMessagesSecondSync(final String streamName2) { return getTestMessages() .stream() .map(Jsons::clone) @@ -471,7 +470,7 @@ void testTablesWithQuoting() throws Exception { final ConfiguredAirbyteStream streamForTableWithSpaces = createTableWithSpaces(); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList( + .withStreams(List.of( getConfiguredCatalogWithOneStream(getDefaultNamespace()).getStreams().get(0), streamForTableWithSpaces)); final List actualMessages = MoreIterators @@ -483,12 +482,12 @@ void testTablesWithQuoting() throws Exception { final List expectedMessages = new ArrayList<>(getTestMessages()); expectedMessages.addAll(secondStreamExpectedMessages); - assertTrue(expectedMessages.size() == actualMessages.size()); - assertTrue(expectedMessages.containsAll(actualMessages)); - assertTrue(actualMessages.containsAll(expectedMessages)); + assertEquals(actualMessages.size(), expectedMessages.size()); + assertEquals(actualMessages, expectedMessages); + assertEquals(expectedMessages, actualMessages); } - protected List getAirbyteMessagesForTablesWithQuoting(ConfiguredAirbyteStream streamForTableWithSpaces) { + protected List getAirbyteMessagesForTablesWithQuoting(final ConfiguredAirbyteStream streamForTableWithSpaces) { return getTestMessages() .stream() .map(Jsons::clone) @@ -509,7 +508,7 @@ void testReadFailure() { final ConfiguredAirbyteStream spiedAbStream = spy( getConfiguredCatalogWithOneStream(getDefaultNamespace()).getStreams().get(0)); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList(spiedAbStream)); + .withStreams(List.of(spiedAbStream)); doCallRealMethod().doThrow(new RuntimeException()).when(spiedAbStream).getStream(); assertThrows(RuntimeException.class, () -> source.read(config, catalog, null)); @@ -521,7 +520,7 @@ void testIncrementalNoPreviousState() throws Exception { COL_ID, null, "3", - Lists.newArrayList(getTestMessages())); + getTestMessages()); } @Test @@ -530,7 +529,7 @@ void testIncrementalIntCheckCursor() throws Exception { COL_ID, "2", "3", - Lists.newArrayList(getTestMessages().get(2))); + List.of(getTestMessages().get(2))); } @Test @@ -539,14 +538,14 @@ void testIncrementalStringCheckCursor() throws Exception { COL_NAME, "patent", "vash", - Lists.newArrayList(getTestMessages().get(0), getTestMessages().get(2))); + List.of(getTestMessages().get(0), getTestMessages().get(2))); } @Test void testIncrementalStringCheckCursorSpaceInColumnName() throws Exception { final ConfiguredAirbyteStream streamWithSpaces = createTableWithSpaces(); - final ArrayList expectedRecordMessages = getAirbyteMessagesCheckCursorSpaceInColumnName(streamWithSpaces); + final List expectedRecordMessages = getAirbyteMessagesCheckCursorSpaceInColumnName(streamWithSpaces); incrementalCursorCheck( COL_LAST_NAME_WITH_SPACE, COL_LAST_NAME_WITH_SPACE, @@ -556,7 +555,7 @@ void testIncrementalStringCheckCursorSpaceInColumnName() throws Exception { streamWithSpaces); } - protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnName(ConfiguredAirbyteStream streamWithSpaces) { + protected List getAirbyteMessagesCheckCursorSpaceInColumnName(final ConfiguredAirbyteStream streamWithSpaces) { final AirbyteMessage firstMessage = getTestMessages().get(0); firstMessage.getRecord().setStream(streamWithSpaces.getStream().getName()); ((ObjectNode) firstMessage.getRecord().getData()).remove(COL_UPDATED_AT); @@ -569,9 +568,7 @@ protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnNa ((ObjectNode) secondMessage.getRecord().getData()).set(COL_LAST_NAME_WITH_SPACE, ((ObjectNode) secondMessage.getRecord().getData()).remove(COL_NAME)); - Lists.newArrayList(getTestMessages().get(0), getTestMessages().get(2)); - - return Lists.newArrayList(firstMessage, secondMessage); + return List.of(firstMessage, secondMessage); } @Test @@ -584,7 +581,7 @@ protected void incrementalDateCheck() throws Exception { COL_UPDATED_AT, "2005-10-18T00:00:00Z", "2006-10-19T00:00:00Z", - Lists.newArrayList(getTestMessages().get(1), getTestMessages().get(2))); + List.of(getTestMessages().get(1), getTestMessages().get(2))); } @Test @@ -597,7 +594,7 @@ void testIncrementalCursorChanges() throws Exception { // records to (incorrectly) be filtered out. "data", "vash", - Lists.newArrayList(getTestMessages())); + getTestMessages()); } @Test @@ -606,14 +603,12 @@ void testReadOneTableIncrementallyTwice() throws Exception { final ConfiguredAirbyteCatalog configuredCatalog = getConfiguredCatalogWithOneStream(namespace); configuredCatalog.getStreams().forEach(airbyteStream -> { airbyteStream.setSyncMode(SyncMode.INCREMENTAL); - airbyteStream.setCursorField(Lists.newArrayList(COL_ID)); + airbyteStream.setCursorField(List.of(COL_ID)); airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); }); - final DbState state = new DbState() - .withStreams(Lists.newArrayList(new DbStreamState().withStreamName(streamName).withStreamNamespace(namespace))); final List actualMessagesFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, Jsons.jsonNode(state))); + .toList(source.read(config, configuredCatalog, createEmptyState(streamName, namespace))); final Optional stateAfterFirstSyncOptional = actualMessagesFirstSync.stream() .filter(r -> r.getType() == Type.STATE).findFirst(); @@ -622,8 +617,7 @@ void testReadOneTableIncrementallyTwice() throws Exception { executeStatementReadIncrementallyTwice(); final List actualMessagesSecondSync = MoreIterators - .toList(source.read(config, configuredCatalog, - stateAfterFirstSyncOptional.get().getState().getData())); + .toList(source.read(config, configuredCatalog, extractState(stateAfterFirstSyncOptional.get()))); assertEquals(2, (int) actualMessagesSecondSync.stream().filter(r -> r.getType() == Type.RECORD).count()); @@ -631,9 +625,9 @@ void testReadOneTableIncrementallyTwice() throws Exception { setEmittedAtToNull(actualMessagesSecondSync); - assertTrue(expectedMessages.size() == actualMessagesSecondSync.size()); - assertTrue(expectedMessages.containsAll(actualMessagesSecondSync)); - assertTrue(actualMessagesSecondSync.containsAll(expectedMessages)); + assertEquals(actualMessagesSecondSync.size(), expectedMessages.size()); + assertEquals(actualMessagesSecondSync, expectedMessages); + assertEquals(expectedMessages, actualMessagesSecondSync); } protected void executeStatementReadIncrementallyTwice() throws SQLException { @@ -647,30 +641,28 @@ protected void executeStatementReadIncrementallyTwice() throws SQLException { }); } - protected List getExpectedAirbyteMessagesSecondSync(String namespace) { + protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19T00:00:00Z"))))); expectedMessages.add(new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_5, COL_NAME, "data", COL_UPDATED_AT, "2006-10-19T00:00:00Z"))))); + final DbStreamState state = new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("5"); expectedMessages.add(new AirbyteMessage() .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList(new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("5"))))))); + .withState(Jsons.object(createState(List.of(state)), AirbyteStateMessage.class))); return expectedMessages; } @@ -702,14 +694,12 @@ void testReadMultipleTablesIncrementally() throws Exception { Field.of(COL_NAME, JsonSchemaType.STRING))); configuredCatalog.getStreams().forEach(airbyteStream -> { airbyteStream.setSyncMode(SyncMode.INCREMENTAL); - airbyteStream.setCursorField(Lists.newArrayList(COL_ID)); + airbyteStream.setCursorField(List.of(COL_ID)); airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); }); - final DbState state = new DbState() - .withStreams(Lists.newArrayList(new DbStreamState().withStreamName(streamName).withStreamNamespace(namespace))); final List actualMessagesFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, Jsons.jsonNode(state))); + .toList(source.read(config, configuredCatalog, createEmptyState(streamName, namespace))); // get last state message. final Optional stateAfterFirstSyncOptional = actualMessagesFirstSync.stream() @@ -720,49 +710,41 @@ void testReadMultipleTablesIncrementally() throws Exception { // we know the second streams messages are the same as the first minus the updated at column. so we // cheat and generate the expected messages off of the first expected messages. final List secondStreamExpectedMessages = getAirbyteMessagesSecondStreamWithNamespace(streamName2); - final List expectedMessagesFirstSync = new ArrayList<>(getTestMessages()); - expectedMessagesFirstSync.add(new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList( - new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("3"), - new DbStreamState() - .withStreamName(streamName2) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)))))))); + final List expectedStateStreams1 = List.of( + new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("3"), + new DbStreamState() + .withStreamName(streamName2) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID))); + + final List expectedStateStreams2 = List.of(new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("3"), + new DbStreamState() + .withStreamName(streamName2) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("3")); + final List expectedMessagesFirstSync = new ArrayList<>(getTestMessages()); + expectedMessagesFirstSync.add(createExpectedTestMessage(expectedStateStreams1)); expectedMessagesFirstSync.addAll(secondStreamExpectedMessages); - expectedMessagesFirstSync.add(new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList( - new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("3"), - new DbStreamState() - .withStreamName(streamName2) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("3"))))))); + expectedMessagesFirstSync.add(createExpectedTestMessage(expectedStateStreams2)); setEmittedAtToNull(actualMessagesFirstSync); - assertTrue(expectedMessagesFirstSync.size() == actualMessagesFirstSync.size()); - assertTrue(expectedMessagesFirstSync.containsAll(actualMessagesFirstSync)); - assertTrue(actualMessagesFirstSync.containsAll(expectedMessagesFirstSync)); + assertEquals(actualMessagesFirstSync.size(), expectedMessagesFirstSync.size()); + assertEquals(actualMessagesFirstSync, expectedMessagesFirstSync); + assertEquals(expectedMessagesFirstSync, actualMessagesFirstSync); } - protected List getAirbyteMessagesSecondStreamWithNamespace(String streamName2) { + protected List getAirbyteMessagesSecondStreamWithNamespace(final String streamName2) { return getTestMessages() .stream() .map(Jsons::clone) @@ -807,39 +789,35 @@ private void incrementalCursorCheck( final ConfiguredAirbyteStream airbyteStream) throws Exception { airbyteStream.setSyncMode(SyncMode.INCREMENTAL); - airbyteStream.setCursorField(Lists.newArrayList(cursorField)); + airbyteStream.setCursorField(List.of(cursorField)); airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); - final DbState state = new DbState() - .withStreams(Lists.newArrayList(new DbStreamState() - .withStreamName(airbyteStream.getStream().getName()) - .withStreamNamespace(airbyteStream.getStream().getNamespace()) - .withCursorField(ImmutableList.of(initialCursorField)) - .withCursor(initialCursorValue))); - final ConfiguredAirbyteCatalog configuredCatalog = new ConfiguredAirbyteCatalog() - .withStreams(ImmutableList.of(airbyteStream)); + .withStreams(List.of(airbyteStream)); + + final DbStreamState dbStreamState = new DbStreamState() + .withStreamName(airbyteStream.getStream().getName()) + .withStreamNamespace(airbyteStream.getStream().getNamespace()) + .withCursorField(List.of(initialCursorField)) + .withCursor(initialCursorValue); final List actualMessages = MoreIterators - .toList(source.read(config, configuredCatalog, Jsons.jsonNode(state))); + .toList(source.read(config, configuredCatalog, createState(List.of(dbStreamState)))); setEmittedAtToNull(actualMessages); + final List expectedStreams = List.of( + new DbStreamState() + .withStreamName(airbyteStream.getStream().getName()) + .withStreamNamespace(airbyteStream.getStream().getNamespace()) + .withCursorField(List.of(cursorField)) + .withCursor(endCursorValue)); final List expectedMessages = new ArrayList<>(expectedRecordMessages); - expectedMessages.add(new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList(new DbStreamState() - .withStreamName(airbyteStream.getStream().getName()) - .withStreamNamespace(airbyteStream.getStream().getNamespace()) - .withCursorField(ImmutableList.of(cursorField)) - .withCursor(endCursorValue))))))); + expectedMessages.add(createExpectedTestMessage(expectedStreams)); - assertTrue(expectedMessages.size() == actualMessages.size()); - assertTrue(expectedMessages.containsAll(actualMessages)); - assertTrue(actualMessages.containsAll(expectedMessages)); + assertEquals(actualMessages.size(), expectedMessages.size()); + assertEquals(actualMessages, expectedMessages); + assertEquals(expectedMessages, actualMessages); } // get catalog and perform a defensive copy. @@ -853,14 +831,14 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalogWithOneStream(final Strin } protected AirbyteCatalog getCatalog(final String defaultNamespace) { - return new AirbyteCatalog().withStreams(Lists.newArrayList( + return new AirbyteCatalog().withStreams(List.of( CatalogHelpers.createAirbyteStream( TABLE_NAME, defaultNamespace, Field.of(COL_ID, JsonSchemaType.NUMBER), Field.of(COL_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))), CatalogHelpers.createAirbyteStream( TABLE_NAME_WITHOUT_PK, @@ -868,7 +846,7 @@ protected AirbyteCatalog getCatalog(final String defaultNamespace) { Field.of(COL_ID, JsonSchemaType.NUMBER), Field.of(COL_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey(Collections.emptyList()), CatalogHelpers.createAirbyteStream( TABLE_NAME_COMPOSITE_PK, @@ -876,34 +854,40 @@ protected AirbyteCatalog getCatalog(final String defaultNamespace) { Field.of(COL_FIRST_NAME, JsonSchemaType.STRING), Field.of(COL_LAST_NAME, JsonSchemaType.STRING), Field.of(COL_UPDATED_AT, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey( List.of(List.of(COL_FIRST_NAME), List.of(COL_LAST_NAME))))); } protected List getTestMessages() { - return Lists.newArrayList( + return List.of( new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_1, COL_NAME, "picard", COL_UPDATED_AT, "2004-10-19T00:00:00Z")))), new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_2, COL_NAME, "crusher", COL_UPDATED_AT, "2005-10-19T00:00:00Z")))), new AirbyteMessage().withType(Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) - .withData(Jsons.jsonNode(ImmutableMap + .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_3, COL_NAME, "vash", COL_UPDATED_AT, "2006-10-19T00:00:00Z"))))); } + protected AirbyteMessage createExpectedTestMessage(final List states) { + return new AirbyteMessage() + .withType(Type.STATE) + .withState(Jsons.object(createState(states), AirbyteStateMessage.class)); + } + protected ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { final String tableNameWithSpaces = TABLE_NAME_WITH_SPACES + "2"; final String streamName2 = tableNameWithSpaces; @@ -994,4 +978,73 @@ protected static void setEmittedAtToNull(final Iterable messages } } + /** + * Tests whether the connector under test supports the per-stream state format or should use the + * legacy format for data generated by this test. + * + * @return {@code true} if the connector supports the per-stream state format or {@code false} if it + * does not support the per-stream state format (e.g. legacy format supported). Default + * value is {@code false}. + */ + protected boolean supportsPerStream() { + return false; + } + + /** + * Creates empty state with the provided stream name and namespace. + * + * @param streamName The stream name. + * @param streamNamespace The stream namespace. + * @return {@link JsonNode} representation of the generated empty state. + */ + protected JsonNode createEmptyState(final String streamName, final String streamNamespace) { + if (supportsPerStream()) { + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() + .withStateType(AirbyteStateType.PER_STREAM) + .withStreams(List.of(new AirbyteStreamState().withName(streamName).withNamespace(streamNamespace))); + return Jsons.jsonNode(airbyteStateMessage); + } else { + final DbState dbState = new DbState() + .withStreams(List.of(new DbStreamState().withStreamName(streamName).withStreamNamespace(streamNamespace))); + return Jsons.jsonNode(dbState); + } + } + + /** + * Creates state with the provided stream(s). + * + * @param streams A list of streams. + * @return A {@link JsonNode} representation of the state with the provided stream state. + */ + protected JsonNode createState(final List streams) { + if (supportsPerStream()) { + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() + .withStateType(AirbyteStateType.PER_STREAM) + .withStreams(streams.stream() + .map(s -> new AirbyteStreamState().withName(s.getStreamName()).withNamespace(s.getStreamNamespace()).withState(Jsons.jsonNode(s))) + .collect(Collectors.toList())); + + return Jsons.jsonNode(airbyteStateMessage); + } else { + final DbState dbState = new DbState() + .withStreams(streams.stream().collect(Collectors.toList())); + return Jsons.jsonNode(dbState); + } + } + + /** + * Extracts the state component from the provided {@link AirbyteMessage} based on the value returned + * by {@link #supportsPerStream()}. + * + * @param airbyteMessage An {@link AirbyteMessage} that contains state. + * @return A {@link JsonNode} representation of the state contained in the {@link AirbyteMessage}. + */ + protected JsonNode extractState(final AirbyteMessage airbyteMessage) { + if (supportsPerStream()) { + return Jsons.jsonNode(airbyteMessage.getState()); + } else { + return airbyteMessage.getState().getData(); + } + } + } diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java index 63f92f7977c4..a054f5226740 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java @@ -10,8 +10,8 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.debezium.CdcStateHandler; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java index 2a770d8e1ddd..1eea401030f1 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -25,8 +25,8 @@ import io.airbyte.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.mssql.MssqlCdcHelper.SnapshotIsolation; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteStream; diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java index d6171c06ff82..a5f950718c0e 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java @@ -10,8 +10,8 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.debezium.CdcStateHandler; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index ea435043efc9..5c2ef9b99a01 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -25,9 +25,9 @@ import io.airbyte.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.mysql.helpers.CdcConfigurationHelper; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteStream; diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java index 50c93d0405ce..ee5faa04f6d9 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java @@ -7,8 +7,8 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.debezium.CdcStateHandler; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index cb83f7324c69..8cf04cf02bd3 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -26,11 +26,13 @@ import io.airbyte.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.jdbc.dto.JdbcPrivilegeDto; -import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; @@ -404,6 +406,25 @@ private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { return stream; } + // TODO This is a temporary override so that the Postgres source can take advantage of per-stream + // state. + @Override + protected AirbyteStateMessage serializeState(final JsonNode stateJson) { + if (stateJson == null) { + // TODO What should the default/empty state be -- per stream or global? + return new AirbyteStateMessage() + .withStateType(AirbyteStateType.PER_STREAM) + .withStreams(List.of()); + } else { + try { + return Jsons.object(stateJson, AirbyteStateMessage.class); + } catch (final IllegalArgumentException e) { + LOGGER.warn("Defaulting to legacy state object..."); + return new AirbyteStateMessage().withData(stateJson); + } + } + } + public static void main(final String[] args) throws Exception { final Source source = PostgresSource.sshWrappedSource(); LOGGER.info("starting source: {}", PostgresSource.class); diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java index 459a44fa86e3..81f18cca890b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java @@ -22,7 +22,6 @@ import io.airbyte.db.jdbc.streaming.AdaptiveStreamingQueryConfig; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; -import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; @@ -175,7 +174,7 @@ protected List getAirbyteMessagesReadOneColumn() { } @Override - protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnName(ConfiguredAirbyteStream streamWithSpaces) { + protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnName(final ConfiguredAirbyteStream streamWithSpaces) { final AirbyteMessage firstMessage = getTestMessages().get(0); firstMessage.getRecord().setStream(streamWithSpaces.getStream().getName()); ((ObjectNode) firstMessage.getRecord().getData()).remove(COL_UPDATED_AT); @@ -200,7 +199,7 @@ protected ArrayList getAirbyteMessagesCheckCursorSpaceInColumnNa } @Override - protected List getAirbyteMessagesSecondSync(String streamName2) { + protected List getAirbyteMessagesSecondSync(final String streamName2) { return getTestMessages() .stream() .map(Jsons::clone) @@ -217,7 +216,7 @@ protected List getAirbyteMessagesSecondSync(String streamName2) .collect(Collectors.toList()); } - protected List getAirbyteMessagesSecondStreamWithNamespace(String streamName2) { + protected List getAirbyteMessagesSecondStreamWithNamespace(final String streamName2) { return getTestMessages() .stream() .map(Jsons::clone) @@ -233,7 +232,7 @@ protected List getAirbyteMessagesSecondStreamWithNamespace(Strin .collect(Collectors.toList()); } - protected List getAirbyteMessagesForTablesWithQuoting(ConfiguredAirbyteStream streamForTableWithSpaces) { + protected List getAirbyteMessagesForTablesWithQuoting(final ConfiguredAirbyteStream streamForTableWithSpaces) { return getTestMessages() .stream() .map(Jsons::clone) @@ -410,7 +409,7 @@ protected JdbcSourceOperations getSourceOperations() { } @Override - protected List getExpectedAirbyteMessagesSecondSync(String namespace) { + protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) @@ -429,18 +428,22 @@ protected List getExpectedAirbyteMessagesSecondSync(String names COL_UPDATED_AT, "2006-10-19", COL_WAKEUP_AT, "12:12:12.123456-05:00", COL_LAST_VISITED_AT, "2006-10-19T17:23:54.123456Z", - COL_LAST_COMMENT_AT, "2006-01-01T17:23:54.123456"))))); + COL_LAST_COMMENT_AT, "2006-01-01T17:23:54.123456", + COL_UPDATED_AT, "2006-10-19"))))); + final DbStreamState state = new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(ImmutableList.of(COL_ID)) + .withCursor("5"); expectedMessages.add(new AirbyteMessage() .withType(AirbyteMessage.Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState() - .withCdc(false) - .withStreams(Lists.newArrayList(new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(ImmutableList.of(COL_ID)) - .withCursor("5"))))))); + .withState(Jsons.object(createState(List.of(state)), AirbyteStateMessage.class))); return expectedMessages; } + @Override + protected boolean supportsPerStream() { + return true; + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index 6ebdc7aa751e..2def5cc59a3d 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -20,12 +20,15 @@ import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.state.StateManager; +import io.airbyte.integrations.source.relationaldb.state.StateManagerFactory; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.CommonField; @@ -103,9 +106,7 @@ public AutoCloseableIterator read(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final JsonNode state) throws Exception { - final StateManager stateManager = new StateManager( - state == null ? StateManager.emptyState() : Jsons.object(state, DbState.class), - catalog); + final StateManager stateManager = StateManagerFactory.createStateManager(serializeState(state), catalog, config); final Instant emittedAt = Instant.now(); final Database database = createDatabaseInternal(config); @@ -509,4 +510,24 @@ private Database createDatabaseInternal(final JsonNode sourceConfig) throws Exce return database; } + /** + * Serializes the state represented as JSON into an object representation. + * + * @param stateJson The state as JSON. + * @return The serialized object representation of the state. + */ + protected AirbyteStateMessage serializeState(final JsonNode stateJson) { + if (stateJson == null) { + // For backwards compatibility with existing connectors + return new AirbyteStateMessage().withData(Jsons.jsonNode(new DbState())); + } else { + try { + return Jsons.object(stateJson, AirbyteStateMessage.class); + } catch (final IllegalArgumentException e) { + LOGGER.warn("Defaulting to legacy state object..."); + return new AirbyteStateMessage().withData(stateJson); + } + } + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java index db33dfd6167b..7b855e6c9770 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java @@ -4,7 +4,6 @@ package io.airbyte.integrations.source.relationaldb; -import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.source.relationaldb.models.CdcState; import org.slf4j.Logger; @@ -12,14 +11,13 @@ public class CdcStateManager { - private static final Logger LOGGER = LoggerFactory.getLogger(StateManager.class); + private static final Logger LOGGER = LoggerFactory.getLogger(CdcStateManager.class); private final CdcState initialState; private CdcState currentState; - @VisibleForTesting - CdcStateManager(final CdcState serialized) { + public CdcStateManager(final CdcState serialized) { this.initialState = serialized; this.currentState = serialized; diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java index 122d62ddbb65..7eabaad9eb31 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java @@ -7,6 +7,7 @@ import com.google.common.collect.AbstractIterator; import io.airbyte.db.IncrementalUtils; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; @@ -40,7 +41,6 @@ public StateDecoratingIterator(final Iterator messageIterator, this.cursorField = cursorField; this.cursorType = cursorType; this.maxCursor = initialCursor; - stateManager.setIsCdc(false); } private String getCursorCandidate(final AirbyteMessage message) { diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java deleted file mode 100644 index 3e509e2869d9..000000000000 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.relationaldb; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.relationaldb.models.DbState; -import io.airbyte.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Handles the state machine for the state of source implementations. - */ -public class StateManager { - - private static final Logger LOGGER = LoggerFactory.getLogger(StateManager.class); - - private final Map pairToCursorInfo; - private Boolean isCdc; - private final CdcStateManager cdcStateManager; - - public static DbState emptyState() { - return new DbState(); - } - - public StateManager(final DbState serialized, final ConfiguredAirbyteCatalog catalog) { - this.cdcStateManager = new CdcStateManager(serialized.getCdcState()); - this.isCdc = serialized.getCdc(); - if (serialized.getCdc() == null) { - this.isCdc = false; - } - - pairToCursorInfo = - new ImmutableMap.Builder().putAll(createCursorInfoMap(serialized, catalog)).build(); - } - - private static Map createCursorInfoMap(final DbState serialized, - final ConfiguredAirbyteCatalog catalog) { - final Set allStreamNames = catalog.getStreams() - .stream() - .map(ConfiguredAirbyteStream::getStream) - .map(AirbyteStreamNameNamespacePair::fromAirbyteSteam) - .collect(Collectors.toSet()); - allStreamNames.addAll(serialized.getStreams().stream().map(StateManager::toAirbyteStreamNameNamespacePair).collect(Collectors.toSet())); - - final Map localMap = new HashMap<>(); - final Map pairToState = serialized.getStreams() - .stream() - .collect(Collectors.toMap(StateManager::toAirbyteStreamNameNamespacePair, a -> a)); - final Map pairToConfiguredAirbyteStream = catalog.getStreams().stream() - .collect(Collectors.toMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, s -> s)); - - for (final AirbyteStreamNameNamespacePair pair : allStreamNames) { - final Optional stateOptional = Optional.ofNullable(pairToState.get(pair)); - final Optional streamOptional = Optional.ofNullable(pairToConfiguredAirbyteStream.get(pair)); - localMap.put(pair, createCursorInfoForStream(pair, stateOptional, streamOptional)); - } - - return localMap; - } - - private static AirbyteStreamNameNamespacePair toAirbyteStreamNameNamespacePair(final DbStreamState state) { - return new AirbyteStreamNameNamespacePair(state.getStreamName(), state.getStreamNamespace()); - } - - @VisibleForTesting - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - static CursorInfo createCursorInfoForStream(final AirbyteStreamNameNamespacePair pair, - final Optional stateOptional, - final Optional streamOptional) { - final String originalCursorField = stateOptional - .map(DbStreamState::getCursorField) - .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) - .orElse(null); - final String originalCursor = stateOptional.map(DbStreamState::getCursor).orElse(null); - - final String cursor; - final String cursorField; - - // if cursor field is set in catalog. - if (streamOptional.map(ConfiguredAirbyteStream::getCursorField).isPresent()) { - cursorField = streamOptional - .map(ConfiguredAirbyteStream::getCursorField) - .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) - .orElse(null); - // if cursor field is set in state. - if (stateOptional.map(DbStreamState::getCursorField).isPresent()) { - // if cursor field in catalog and state are the same. - if (stateOptional.map(DbStreamState::getCursorField).equals(streamOptional.map(ConfiguredAirbyteStream::getCursorField))) { - cursor = stateOptional.map(DbStreamState::getCursor).orElse(null); - LOGGER.info("Found matching cursor in state. Stream: {}. Cursor Field: {} Value: {}", pair, cursorField, cursor); - // if cursor field in catalog and state are different. - } else { - cursor = null; - LOGGER.info( - "Found cursor field. Does not match previous cursor field. Stream: {}. Original Cursor Field: {}. New Cursor Field: {}. Resetting cursor value.", - pair, originalCursorField, cursorField); - } - // if cursor field is not set in state but is set in catalog. - } else { - LOGGER.info("No cursor field set in catalog but not present in state. Stream: {}, New Cursor Field: {}. Resetting cursor value", pair, - cursorField); - cursor = null; - } - // if cursor field is not set in catalog. - } else { - LOGGER.info( - "Cursor field set in state but not present in catalog. Stream: {}. Original Cursor Field: {}. Original value: {}. Resetting cursor.", - pair, originalCursorField, originalCursor); - cursorField = null; - cursor = null; - } - - return new CursorInfo(originalCursorField, originalCursor, cursorField, cursor); - } - - private Optional getCursorInfo(final AirbyteStreamNameNamespacePair pair) { - return Optional.ofNullable(pairToCursorInfo.get(pair)); - } - - public Optional getOriginalCursorField(final AirbyteStreamNameNamespacePair pair) { - return getCursorInfo(pair).map(CursorInfo::getOriginalCursorField); - } - - public Optional getOriginalCursor(final AirbyteStreamNameNamespacePair pair) { - return getCursorInfo(pair).map(CursorInfo::getOriginalCursor); - } - - public Optional getCursorField(final AirbyteStreamNameNamespacePair pair) { - return getCursorInfo(pair).map(CursorInfo::getCursorField); - } - - public Optional getCursor(final AirbyteStreamNameNamespacePair pair) { - return getCursorInfo(pair).map(CursorInfo::getCursor); - } - - synchronized public AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pair, final String cursor) { - // cdc file gets updated by debezium so the "update" part is a no op. - if (!isCdc) { - final Optional cursorInfo = getCursorInfo(pair); - Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); - cursorInfo.get().setCursor(cursor); - } - - return toState(); - } - - public void setIsCdc(final boolean isCdc) { - if (this.isCdc == null) { - this.isCdc = isCdc; - } else { - Preconditions.checkState(this.isCdc == isCdc, "attempt to set cdc to {}, but is already set to {}.", isCdc, this.isCdc); - } - } - - public CdcStateManager getCdcStateManager() { - return cdcStateManager; - } - - public AirbyteStateMessage emit() { - return toState(); - } - - private AirbyteStateMessage toState() { - final DbState DbState = new DbState() - .withCdc(isCdc) - .withStreams(pairToCursorInfo.entrySet().stream() - .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> new DbStreamState() - .withStreamName(e.getKey().getName()) - .withStreamNamespace(e.getKey().getNamespace()) - .withCursorField(e.getValue().getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(e.getValue().getCursorField())) - .withCursor(e.getValue().getCursor())) - .collect(Collectors.toList())) - .withCdcState(cdcStateManager.getCdcState()); - - return new AirbyteStateMessage().withData(Jsons.jsonNode(DbState)); - } - -} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java new file mode 100644 index 000000000000..cac2ad478c42 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.function.Supplier; + +/** + * Abstract implementation of the {@link StateManager} interface that provides common functionality + * for state manager implementations. + * + * @param The type associated with the state object managed by this manager. + * @param The type associated with the state object stored in the state managed by this manager. + */ +public abstract class AbstractStateManager implements StateManager { + + /** + * The {@link CursorManager} responsible for keeping track of the current cursor value for each + * stream managed by this state manager. + */ + private final CursorManager cursorManager; + + /** + * Constructs a new state manager for the given configured connector. + * + * @param catalog The connector's configured catalog. + * @param streamSupplier A {@link Supplier} that provides the cursor manager with the collection of + * streams tracked by the connector's state. + * @param cursorFunction A {@link Function} that extracts the current cursor from a stream stored in + * the connector's state. + * @param cursorFieldFunction A {@link Function} that extracts the cursor field name from a stream + * stored in the connector's state. + * @param namespacePairFunction A {@link Function} that generates a + * {@link AirbyteStreamNameNamespacePair} that identifies each stream in the connector's + * state. + */ + public AbstractStateManager(final ConfiguredAirbyteCatalog catalog, + final Supplier> streamSupplier, + final Function cursorFunction, + final Function> cursorFieldFunction, + final Function namespacePairFunction) { + cursorManager = new CursorManager(catalog, streamSupplier, cursorFunction, cursorFieldFunction, namespacePairFunction); + } + + @Override + public Map getPairToCursorInfoMap() { + return cursorManager.getPairToCursorInfo(); + } + + @Override + public abstract AirbyteStateMessage toState(); + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java new file mode 100644 index 000000000000..5006467882bb --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Manages the map of streams to current cursor values for state management. + * + * @param The type that represents the stream object which holds the current cursor information + * in the state. + */ +public class CursorManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(CursorManager.class); + + /** + * Map of streams (name/namespace tuple) to the current cursor information stored in the state. + */ + private final Map pairToCursorInfo; + + /** + * Constructs a new {@link CursorManager} based on the configured connector and current state + * information. + * + * @param catalog The connector's configured catalog. + * @param streamSupplier A {@link Supplier} that provides the cursor manager with the collection of + * streams tracked by the connector's state. + * @param cursorFunction A {@link Function} that extracts the current cursor from a stream stored in + * the connector's state. + * @param cursorFieldFunction A {@link Function} that extracts the cursor field name from a stream + * stored in the connector's state. + * @param namespacePairFunction A {@link Function} that generates a + * {@link AirbyteStreamNameNamespacePair} that identifies each stream in the connector's + * state. + */ + public CursorManager(final ConfiguredAirbyteCatalog catalog, + final Supplier> streamSupplier, + final Function cursorFunction, + final Function> cursorFieldFunction, + final Function namespacePairFunction) { + pairToCursorInfo = createCursorInfoMap(catalog, streamSupplier, cursorFunction, cursorFieldFunction, namespacePairFunction); + } + + /** + * Creates the cursor information map that associates stream name/namespace tuples with the current + * cursor information for that stream as stored in the connector's state. + * + * @param catalog The connector's configured catalog. + * @param streamSupplier A {@link Supplier} that provides the cursor manager with the collection of + * streams tracked by the connector's state. + * @param cursorFunction A {@link Function} that extracts the current cursor from a stream stored in + * the connector's state. + * @param cursorFieldFunction A {@link Function} that extracts the cursor field name from a stream + * stored in the connector's state. + * @param namespacePairFunction A {@link Function} that generates a + * {@link AirbyteStreamNameNamespacePair} that identifies each stream in the connector's + * state. + * @return A map of streams to current cursor information for the stream. + */ + @VisibleForTesting + protected Map createCursorInfoMap( + final ConfiguredAirbyteCatalog catalog, + final Supplier> streamSupplier, + final Function cursorFunction, + final Function> cursorFieldFunction, + final Function namespacePairFunction) { + final Set allStreamNames = catalog.getStreams() + .stream() + .map(ConfiguredAirbyteStream::getStream) + .map(AirbyteStreamNameNamespacePair::fromAirbyteSteam) + .collect(Collectors.toSet()); + allStreamNames.addAll(streamSupplier.get().stream().map(namespacePairFunction).collect(Collectors.toSet())); + + final Map localMap = new HashMap<>(); + final Map pairToState = streamSupplier.get() + .stream() + .collect(Collectors.toMap(namespacePairFunction, a -> a)); + final Map pairToConfiguredAirbyteStream = catalog.getStreams().stream() + .collect(Collectors.toMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, s -> s)); + + for (final AirbyteStreamNameNamespacePair pair : allStreamNames) { + final Optional stateOptional = Optional.ofNullable(pairToState.get(pair)); + final Optional streamOptional = Optional.ofNullable(pairToConfiguredAirbyteStream.get(pair)); + localMap.put(pair, createCursorInfoForStream(pair, stateOptional, streamOptional, cursorFunction, cursorFieldFunction)); + } + + return localMap; + } + + /** + * Generates a {@link CursorInfo} object based on the data currently stored in the connector's state + * for the given stream. + * + * @param pair A {@link AirbyteStreamNameNamespacePair} that identifies a specific stream managed by + * the connector. + * @param stateOptional {@link Optional} containing the current state associated with the stream. + * @param streamOptional {@link Optional} containing the {@link ConfiguredAirbyteStream} associated + * with the stream. + * @param cursorFunction A {@link Function} that provides the current cursor from the state + * associated with the stream. + * @param cursorFieldFunction A {@link Function} that provides the cursor field name for the cursor + * stored in the state associated with the stream. + * @return A {@link CursorInfo} object based on the data currently stored in the connector's state + * for the given stream. + */ + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + @VisibleForTesting + protected CursorInfo createCursorInfoForStream(final AirbyteStreamNameNamespacePair pair, + final Optional stateOptional, + final Optional streamOptional, + final Function cursorFunction, + final Function> cursorFieldFunction) { + final String originalCursorField = stateOptional + .map(cursorFieldFunction) + .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) + .orElse(null); + final String originalCursor = stateOptional.map(cursorFunction).orElse(null); + + final String cursor; + final String cursorField; + + // if cursor field is set in catalog. + if (streamOptional.map(ConfiguredAirbyteStream::getCursorField).isPresent()) { + cursorField = streamOptional + .map(ConfiguredAirbyteStream::getCursorField) + .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) + .orElse(null); + // if cursor field is set in state. + if (stateOptional.map(cursorFieldFunction).isPresent()) { + // if cursor field in catalog and state are the same. + if (stateOptional.map(cursorFieldFunction).equals(streamOptional.map(ConfiguredAirbyteStream::getCursorField))) { + cursor = stateOptional.map(cursorFunction).orElse(null); + LOGGER.info("Found matching cursor in state. Stream: {}. Cursor Field: {} Value: {}", pair, cursorField, cursor); + // if cursor field in catalog and state are different. + } else { + cursor = null; + LOGGER.info( + "Found cursor field. Does not match previous cursor field. Stream: {}. Original Cursor Field: {}. New Cursor Field: {}. Resetting cursor value.", + pair, originalCursorField, cursorField); + } + // if cursor field is not set in state but is set in catalog. + } else { + LOGGER.info("No cursor field set in catalog but not present in state. Stream: {}, New Cursor Field: {}. Resetting cursor value", pair, + cursorField); + cursor = null; + } + // if cursor field is not set in catalog. + } else { + LOGGER.info( + "Cursor field set in state but not present in catalog. Stream: {}. Original Cursor Field: {}. Original value: {}. Resetting cursor.", + pair, originalCursorField, originalCursor); + cursorField = null; + cursor = null; + } + + return new CursorInfo(originalCursorField, originalCursor, cursorField, cursor); + } + + /** + * Retrieves a copy of the stream name/namespace tuple to current cursor information map. + * + * @return A copy of the stream name/namespace tuple to current cursor information map. + */ + public Map getPairToCursorInfo() { + return Map.copyOf(pairToCursorInfo); + } + + /** + * Retrieves an {@link Optional} possibly containing the current {@link CursorInfo} associated with + * the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the current {@link CursorInfo} associated with + * the provided stream name/namespace tuple. + */ + public Optional getCursorInfo(final AirbyteStreamNameNamespacePair pair) { + return Optional.ofNullable(pairToCursorInfo.get(pair)); + } + + /** + * Retrieves an {@link Optional} possibly containing the cursor field name associated with the + * cursor tracked in the state associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the cursor field name associated with the cursor + * tracked in the state associated with the provided stream name/namespace tuple. + */ + public Optional getCursorField(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getCursorField); + } + + /** + * Retrieves an {@link Optional} possibly containing the cursor value tracked in the state + * associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the cursor value tracked in the state associated + * with the provided stream name/namespace tuple. + */ + public Optional getCursor(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getCursor); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java new file mode 100644 index 000000000000..2613ebe4bdcd --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.base.Preconditions; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Collections; +import java.util.List; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Legacy implementation (pre-per-stream state support) of the {@link StateManager} interface. + * + * This implementation assumes that the state matches the {@link DbState} object and effectively + * tracks state as global across the streams managed by a connector. + * + * @deprecated This manager may be removed in the future if/once all connectors support per-stream + * state management. + */ +@Deprecated(forRemoval = true) +public class LegacyStateManager extends AbstractStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(LegacyStateManager.class); + + /** + * {@link Function} that extracts the cursor from the stream state. + */ + private static final Function CURSOR_FUNCTION = DbStreamState::getCursor; + + /** + * {@link Function} that extracts the cursor field(s) from the stream state. + */ + private static final Function> CURSOR_FIELD_FUNCTION = DbStreamState::getCursorField; + + /** + * {@link Function} that creates an {@link AirbyteStreamNameNamespacePair} from the stream state. + */ + private static final Function NAME_NAMESPACE_PAIR_FUNCTION = + s -> new AirbyteStreamNameNamespacePair(s.getStreamName(), s.getStreamNamespace()); + + /** + * Tracks whether the connector associated with this state manager supports CDC. + */ + private Boolean isCdc; + + /** + * {@link CdcStateManager} used to manage state for connectors that support CDC. + */ + private final CdcStateManager cdcStateManager; + + /** + * Constructs a new {@link LegacyStateManager} that is seeded with the provided {@link DbState} + * instance. + * + * @param dbState The initial state represented as an {@link DbState} instance. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state + * manager. + */ + public LegacyStateManager(final DbState dbState, final ConfiguredAirbyteCatalog catalog) { + super(catalog, + () -> dbState.getStreams(), + CURSOR_FUNCTION, + CURSOR_FIELD_FUNCTION, + NAME_NAMESPACE_PAIR_FUNCTION); + + this.cdcStateManager = new CdcStateManager(dbState.getCdcState()); + this.isCdc = dbState.getCdc(); + if (dbState.getCdc() == null) { + this.isCdc = false; + } + } + + @Override + public CdcStateManager getCdcStateManager() { + return cdcStateManager; + } + + @Override + public AirbyteStateMessage toState() { + final DbState DbState = new DbState() + .withCdc(isCdc) + .withStreams(getPairToCursorInfoMap().entrySet().stream() + .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. + .map(e -> new DbStreamState() + .withStreamName(e.getKey().getName()) + .withStreamNamespace(e.getKey().getNamespace()) + .withCursorField(e.getValue().getCursorField() == null ? Collections.emptyList() : List.of(e.getValue().getCursorField())) + .withCursor(e.getValue().getCursor())) + .collect(Collectors.toList())) + .withCdcState(getCdcStateManager().getCdcState()); + + LOGGER.info("Generated legacy state for {} streams"); + return new AirbyteStateMessage().withData(Jsons.jsonNode(DbState)); + } + + @Override + public AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pair, final String cursor) { + // cdc file gets updated by debezium so the "update" part is a no op. + if (!isCdc) { + final Optional cursorInfo = getCursorInfo(pair); + Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); + cursorInfo.get().setCursor(cursor); + } + + return toState(); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java new file mode 100644 index 000000000000..fb45e0052691 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.collect.Lists; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PerStreamStateManager extends AbstractStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(PerStreamStateManager.class); + + /** + * {@link Function} that extracts the cursor from the stream state. + */ + private static final Function CURSOR_FUNCTION = stream -> { + final Optional dbStreamState = extractState(stream); + if (dbStreamState.isPresent()) { + return dbStreamState.get().getCursor(); + } else { + return null; + } + }; + + /** + * {@link Function} that extracts the cursor field(s) from the stream state. + */ + private static final Function> CURSOR_FIELD_FUNCTION = stream -> { + final Optional dbStreamState = extractState(stream); + if (dbStreamState.isPresent()) { + return dbStreamState.get().getCursorField(); + } else { + return List.of(); + } + }; + + /** + * {@link Function} that creates an {@link AirbyteStreamNameNamespacePair} from the stream state. + */ + private static final Function NAME_NAMESPACE_PAIR_FUNCTION = + s -> new AirbyteStreamNameNamespacePair(s.getName(), s.getNamespace()); + + /** + * Constructs a new {@link PerStreamStateManager} that is seeded with the provided + * {@link AirbyteStateMessage}. + * + * @param airbyteStateMessage The initial state represented as an {@link AirbyteStateMessage}. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state + * manager. + */ + public PerStreamStateManager(final AirbyteStateMessage airbyteStateMessage, final ConfiguredAirbyteCatalog catalog) { + super(catalog, + () -> airbyteStateMessage.getStreams(), + CURSOR_FUNCTION, + CURSOR_FIELD_FUNCTION, + NAME_NAMESPACE_PAIR_FUNCTION); + } + + @Override + public CdcStateManager getCdcStateManager() { + return new CdcStateManager(null); + } + + @Override + public AirbyteStateMessage toState() { + final Map pairCursorInfoMap = getPairToCursorInfoMap(); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage(); + final List airbyteStreamStates = generatePerStreamState(pairCursorInfoMap); + + // TODO detect global? + + return airbyteStateMessage.withStateType(AirbyteStateType.PER_STREAM).withStreams(airbyteStreamStates); + } + + /** + * Generates the per-stream state for each stream. + * + * @param pairCursorInfoMap The map of stream name/namespace to current cursor information. + * @return The list of per-stream state. + */ + private List generatePerStreamState(final Map pairCursorInfoMap) { + return pairCursorInfoMap.entrySet().stream() + .filter(s -> s.getKey().getName() != null && s.getKey().getNamespace() != null) + .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. + .map(e -> new AirbyteStreamState() + .withName(e.getKey().getName()) + .withNamespace(e.getKey().getNamespace()) + .withState(Jsons.jsonNode(generateDbStreamState(e.getKey(), e.getValue())))) + .collect(Collectors.toList()); + } + + /** + * Generates the {@link DbStreamState} for the given stream and cursor. + * + * @param airbyteStreamNameNamespacePair The stream. + * @param cursorInfo The current cursor. + * @return The {@link DbStreamState}. + */ + private DbStreamState generateDbStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, final CursorInfo cursorInfo) { + return new DbStreamState() + .withStreamName(airbyteStreamNameNamespacePair.getName()) + .withStreamNamespace(airbyteStreamNameNamespacePair.getNamespace()) + .withCursorField(cursorInfo.getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(cursorInfo.getCursorField())) + .withCursor(cursorInfo.getCursor()); + } + + /** + * Extracts the actual state from the {@link AirbyteStreamState} object. + * + * @param state The {@link AirbyteStreamState} that contains the actual stream state as JSON. + * @return An {@link Optional} possibly containing the deserialized representation of the stream + * state or an empty {@link Optional} if the state is not present or could not be + * deserialized. + */ + private static Optional extractState(final AirbyteStreamState state) { + try { + return Optional.ofNullable(Jsons.object(state.getState(), DbStreamState.class)); + } catch (final IllegalArgumentException e) { + LOGGER.error("Unable to extract state.", e); + return Optional.empty(); + } + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java new file mode 100644 index 000000000000..1f6ded0cab07 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.base.Preconditions; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.protocol.models.AirbyteStateMessage; +import java.util.Map; +import java.util.Optional; + +/** + * Defines a manager that manages connector state. Connector state is used to keep track of the data + * synced by the connector. + * + * @param The type of the state maintained by the manager. + * @param The type of the stream(s) stored within the state maintained by the manager. + */ +public interface StateManager { + + /** + * Retrieves the {@link CdcStateManager} associated with the state manager. + * + * @return The {@link CdcStateManager} + * @deprecated This method will be removed in the future in favor of a state manager that supports + * CDC-related state. + */ + @Deprecated(forRemoval = true) + CdcStateManager getCdcStateManager(); + + /** + * Retrieves the map of stream name/namespace tuple to the current cursor information for that + * stream. + * + * @return The map of stream name/namespace tuple to the current cursor information for that stream + * as maintained by this state manager. + */ + Map getPairToCursorInfoMap(); + + /** + * Generates an {@link AirbyteStateMessage} that represents the current state contained in the state + * manager. + * + * @return The {@link AirbyteStateMessage} that represents the current state contained in the state + * manager. + */ + AirbyteStateMessage toState(); + + /** + * Retrieves an {@link Optional} possibly containing the cursor value tracked in the state + * associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the cursor value tracked in the state associated + * with the provided stream name/namespace tuple. + */ + default Optional getCursor(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getCursor); + } + + /** + * Retrieves an {@link Optional} possibly containing the cursor field name associated with the + * cursor tracked in the state associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the cursor field name associated with the cursor + * tracked in the state associated with the provided stream name/namespace tuple. + */ + default Optional getCursorField(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getCursorField); + } + + /** + * Retrieves an {@link Optional} possibly containing the original cursor value tracked in the state + * associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the original cursor value tracked in the state + * associated with the provided stream name/namespace tuple. + */ + default Optional getOriginalCursor(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getOriginalCursor); + } + + /** + * Retrieves an {@link Optional} possibly containing the original cursor field name associated with + * the cursor tracked in the state associated with the provided stream name/namespace tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} which identifies a stream. + * @return An {@link Optional} possibly containing the original cursor field name associated with + * the cursor tracked in the state associated with the provided stream name/namespace tuple. + */ + default Optional getOriginalCursorField(final AirbyteStreamNameNamespacePair pair) { + return getCursorInfo(pair).map(CursorInfo::getOriginalCursorField); + } + + /** + * Retrieves the current cursor information stored in the state manager for the steam name/namespace + * tuple. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the + * state manager. + * @return {@link Optional} that potentially contains the current cursor information for the given + * stream name/namespace tuple. + */ + default Optional getCursorInfo(final AirbyteStreamNameNamespacePair pair) { + return Optional.ofNullable(getPairToCursorInfoMap().get(pair)); + } + + /** + * Emits the current state maintained by the manager as an {@link AirbyteStateMessage}. + * + * @return An {@link AirbyteStateMessage} that represents the current state maintained by the state + * manager. + */ + default AirbyteStateMessage emit() { + return toState(); + } + + /** + * Updates the cursor associated with the provided stream name/namespace pair and emits the current + * state maintained by the state manager. + * + * @param pair The {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the + * state manager. + * @param cursor The new value for the cursor associated with the + * {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the state + * manager. + * @return An {@link AirbyteStateMessage} that represents the current state maintained by the state + * manager. + */ + default AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pair, final String cursor) { + final Optional cursorInfo = getCursorInfo(pair); + Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); + cursorInfo.get().setCursor(cursor); + return emit(); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java new file mode 100644 index 000000000000..9a1a16cc988d --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Factory class that creates {@link StateManager} instances based on the provided state. + */ +public class StateManagerFactory { + + private static final Logger LOGGER = LoggerFactory.getLogger(StateManagerFactory.class); + + /** + * Private constructor to prevent direct instantiation. + */ + private StateManagerFactory() {} + + /** + * Creates a {@link StateManager} based on the provided state object and catalog. + * + * @param state The deserialized state. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector that will utilize the state + * manager. + * @param config The connector configuration. + * @return A newly created {@link StateManager} implementation based on the provided state. + */ + public static StateManager createStateManager(final Object state, final ConfiguredAirbyteCatalog catalog, final JsonNode config) { + if (state instanceof AirbyteStateMessage airbyteStateMessage) { + if (airbyteStateMessage.getData() != null) { + LOGGER.info("Legacy state manager selected to manage state object with type {}.", state.getClass().getName()); + return new LegacyStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); + } else if (isCdc(config)) { + LOGGER.info("CDC state manager selected to manage state object with type {}.", state.getClass().getName()); + // TODO create proper CDC state manager + return null; + } else if (airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { + LOGGER.info("Global state manager selected to manage state object with type {}.", state.getClass().getName()); + // TODO create proper Global state manager + return null; + } else { + LOGGER.info("Per stream state manager selected to manage state object with type {}.", state.getClass().getName()); + return new PerStreamStateManager(airbyteStateMessage, catalog); + } + } else if (state instanceof DbState dbState) { + LOGGER.info("Legacy state manager selected to manage state object with type {}.", state.getClass().getName()); + return new LegacyStateManager(dbState, catalog); + } else { + throw new IllegalArgumentException( + "Failed to create state manager due to detection of unsupported state object type: " + state.getClass().getName()); + } + } + + /** + * Test whether the connector is configured to use change data capture (CDC) for replication. + * + * @param config The connector configuration. + * @return {@code true} if the connector utilizes CDC or {@code false} otherwise. + */ + private static boolean isCdc(final JsonNode config) { + return config.hasNonNull("replication_method") + && config.get("replication_method").hasNonNull("replication_slot") + && config.get("replication_method").hasNonNull("publication"); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java index 7fb6964d2654..e464a95e40fa 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java @@ -14,6 +14,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java deleted file mode 100644 index 9e64edb55b7e..000000000000 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.relationaldb; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.relationaldb.models.DbState; -import io.airbyte.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.AirbyteStream; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import java.util.Collections; -import java.util.Comparator; -import java.util.Optional; -import java.util.stream.Collectors; -import org.junit.jupiter.api.Test; -import org.testcontainers.shaded.com.google.common.collect.Lists; - -class StateManagerTest { - - private static final String NAMESPACE = "public"; - private static final String STREAM_NAME1 = "cars"; - private static final AirbyteStreamNameNamespacePair NAME_NAMESPACE_PAIR1 = new AirbyteStreamNameNamespacePair(STREAM_NAME1, NAMESPACE); - private static final String STREAM_NAME2 = "bicycles"; - private static final AirbyteStreamNameNamespacePair NAME_NAMESPACE_PAIR2 = new AirbyteStreamNameNamespacePair(STREAM_NAME2, NAMESPACE); - private static final String STREAM_NAME3 = "stationary_bicycles"; - private static final String CURSOR_FIELD1 = "year"; - private static final String CURSOR_FIELD2 = "generation"; - private static final String CURSOR = "2000"; - - @Test - void testCreateCursorInfoCatalogAndStateSameCursorField() { - final CursorInfo actual = - StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(CURSOR_FIELD1)); - assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD1, CURSOR), actual); - } - - @Test - void testCreateCursorInfoCatalogAndStateSameCursorFieldButNoCursor() { - final CursorInfo actual = - StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, null), getCatalog(CURSOR_FIELD1)); - assertEquals(new CursorInfo(CURSOR_FIELD1, null, CURSOR_FIELD1, null), actual); - } - - @Test - void testCreateCursorInfoCatalogAndStateChangeInCursorFieldName() { - final CursorInfo actual = - StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(CURSOR_FIELD2)); - assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD2, null), actual); - } - - @Test - void testCreateCursorInfoCatalogAndNoState() { - final CursorInfo actual = StateManager - .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, Optional.empty(), getCatalog(CURSOR_FIELD1)); - assertEquals(new CursorInfo(null, null, CURSOR_FIELD1, null), actual); - } - - @Test - void testCreateCursorInfoStateAndNoCatalog() { - final CursorInfo actual = StateManager - .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), Optional.empty()); - assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); - } - - // this is what full refresh looks like. - @Test - void testCreateCursorInfoNoCatalogAndNoState() { - final CursorInfo actual = StateManager - .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, Optional.empty(), Optional.empty()); - assertEquals(new CursorInfo(null, null, null, null), actual); - } - - @Test - void testCreateCursorInfoStateAndCatalogButNoCursorField() { - final CursorInfo actual = StateManager - .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(null)); - assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); - } - - @SuppressWarnings("SameParameterValue") - private static Optional getState(final String cursorField, final String cursor) { - return Optional.of(new DbStreamState() - .withStreamName(STREAM_NAME1) - .withCursorField(Lists.newArrayList(cursorField)) - .withCursor(cursor)); - } - - private static Optional getCatalog(final String cursorField) { - return Optional.of(new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1)) - .withCursorField(cursorField == null ? Collections.emptyList() : Lists.newArrayList(cursorField))); - } - - @Test - void testGetters() { - final DbState state = new DbState().withStreams(Lists.newArrayList( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) - .withCursor(CURSOR), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE))); - - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(Lists.newArrayList(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - - final StateManager stateManager = new StateManager(state, catalog); - - assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getCursorField(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR), stateManager.getCursor(NAME_NAMESPACE_PAIR1)); - - assertEquals(Optional.empty(), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getCursorField(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getCursor(NAME_NAMESPACE_PAIR2)); - } - - @Test - void testToState() { - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(Lists.newArrayList(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) - .withCursorField(Lists.newArrayList(CURSOR_FIELD2)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); - - final StateManager stateManager = new StateManager(new DbState(), catalog); - - final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState().withStreams(Lists - .newArrayList( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD2)), - new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - .withCdc(false))); - final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); - assertEquals(expectedFirstEmission, actualFirstEmission); - final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState().withStreams(Lists - .newArrayList( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD2)) - .withCursor("b"), - new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - .withCdc(false))); - final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); - assertEquals(expectedSecondEmission, actualSecondEmission); - } - - @Test - void testToStateNullCursorField() { - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(Lists.newArrayList( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(Lists.newArrayList(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - final StateManager stateManager = new StateManager(new DbState(), catalog); - - final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState().withStreams(Lists - .newArrayList( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - .withCdc(false))); - - final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); - assertEquals(expectedFirstEmission, actualFirstEmission); - } - -} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java new file mode 100644 index 000000000000..ac23123d2972 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getCatalog; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getState; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getStream; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStreamState; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link CursorManager} class. + */ +public class CursorManagerTest { + + @Test + void testCreateCursorInfoCatalogAndStateSameCursorField() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, CURSOR), + getStream(CURSOR_FIELD1), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD1, CURSOR), actual); + } + + @Test + void testCreateCursorInfoCatalogAndStateSameCursorFieldButNoCursor() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, null, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, null), + getStream(CURSOR_FIELD1), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, null, CURSOR_FIELD1, null), actual); + } + + @Test + void testCreateCursorInfoCatalogAndStateChangeInCursorFieldName() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, CURSOR), + getStream(CURSOR_FIELD2), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD2, null), actual); + } + + @Test + void testCreateCursorInfoCatalogAndNoState() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + Optional.empty(), + getStream(CURSOR_FIELD1), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(null, null, CURSOR_FIELD1, null), actual); + } + + @Test + void testCreateCursorInfoStateAndNoCatalog() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, CURSOR), + Optional.empty(), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); + } + + // this is what full refresh looks like. + @Test + void testCreateCursorInfoNoCatalogAndNoState() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + Optional.empty(), + Optional.empty(), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(null, null, null, null), actual); + } + + @Test + void testCreateCursorInfoStateAndCatalogButNoCursorField() { + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actual = cursorManager.createCursorInfoForStream( + NAME_NAMESPACE_PAIR1, + getState(CURSOR_FIELD1, CURSOR), + getStream(null), + DbStreamState::getCursor, + DbStreamState::getCursorField); + assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); + } + + @Test + void testGetters() { + final AirbyteStateMessage state = new AirbyteStateMessage() + .withStreams(List.of( + new AirbyteStreamState() + .withName(STREAM_NAME1) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor(CURSOR))), + new AirbyteStreamState() + .withName(STREAM_NAME2) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE))))); + + final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); + final CursorInfo actualCursorInfo = new CursorInfo(CURSOR_FIELD1, CURSOR, null, null); + + assertEquals(Optional.of(actualCursorInfo), cursorManager.getCursorInfo(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.empty(), cursorManager.getCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.empty(), cursorManager.getCursor(NAME_NAMESPACE_PAIR1)); + + assertEquals(Optional.empty(), cursorManager.getCursorInfo(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), cursorManager.getCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), cursorManager.getCursor(NAME_NAMESPACE_PAIR2)); + } + + private CursorManager createCursorManager(final String cursorField, + final String cursor, + final AirbyteStreamNameNamespacePair nameNamespacePair) { + final DbStreamState dbStreamState = getState(cursorField, cursor).get(); + return new CursorManager<>( + getCatalog(cursorField).orElse(null), + () -> Collections.singleton(dbStreamState), + DbStreamState::getCursor, + DbStreamState::getCursorField, + s -> nameNamespacePair); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java new file mode 100644 index 000000000000..0eda420c0770 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link LegacyStateManager} class. + */ +public class LegacyStateManagerTest { + + @Test + void testGetters() { + final DbState state = new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor(CURSOR), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE))); + + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new LegacyStateManager(state, catalog); + + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getCursor(NAME_NAMESPACE_PAIR1)); + + assertEquals(Optional.empty(), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursor(NAME_NAMESPACE_PAIR2)); + } + + @Test + void testToState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new LegacyStateManager(new DbState(), catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(false))); + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(false))); + final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); + assertEquals(expectedSecondEmission, actualSecondEmission); + } + + @Test + void testToStateNullCursorField() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + final StateManager stateManager = new LegacyStateManager(new DbState(), catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(false))); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + } + + @Test + void testCursorNotUpdatedForCdc() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + + final DbState state = new DbState(); + state.setCdc(true); + final StateManager stateManager = new LegacyStateManager(state, catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor(null), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of())) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(true))); + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor(null), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of()) + .withCursor(null)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(true))); + final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); + assertEquals(expectedSecondEmission, actualSecondEmission); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java new file mode 100644 index 000000000000..54b988eb5591 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mock; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link PerStreamStateManager} class. + */ +public class PerStreamStateManagerTest { + + @Test + void testCreationFromInvalidState() { + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() + .withStreams(List.of( + new AirbyteStreamState() + .withName(STREAM_NAME1) + .withNamespace(NAMESPACE) + .withState(Jsons.jsonNode("Not a state object")))); + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + + Assertions.assertDoesNotThrow(() -> { + final StateManager stateManager = new PerStreamStateManager(airbyteStateMessage, catalog); + assertNotNull(stateManager); + }); + } + + @Test + void testGetters() { + final AirbyteStateMessage state = new AirbyteStateMessage() + .withStreams(List.of( + new AirbyteStreamState() + .withName(STREAM_NAME1) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor(CURSOR))), + new AirbyteStreamState() + .withName(STREAM_NAME2) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE))))); + + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new PerStreamStateManager(state, catalog); + + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getCursor(NAME_NAMESPACE_PAIR1)); + + assertEquals(Optional.empty(), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursor(NAME_NAMESPACE_PAIR2)); + } + + @Test + void testToState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new PerStreamStateManager(new AirbyteStateMessage(), catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + // .withData(Jsons.jsonNode(new DbState().withStreams(List + // .of( + // new + // DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + // .withCursor("a"), + // new + // DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)), + // new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + // .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + // .withCdc(false))) + .withStreams(List.of( + new AirbyteStreamState() + .withName(STREAM_NAME1) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"))), + new AirbyteStreamState() + .withName(STREAM_NAME2) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)))), + new AirbyteStreamState() + .withName(STREAM_NAME3) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)))) + .stream().sorted(Comparator.comparing(AirbyteStreamState::getName)).collect(Collectors.toList())); + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() + // .withData(Jsons.jsonNode(new DbState().withStreams(List + // .of( + // new + // DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + // .withCursor("a"), + // new + // DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)) + // .withCursor("b"), + // new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + // .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + // .withCdc(false))) + .withStreams(List.of( + new AirbyteStreamState() + .withName(STREAM_NAME1) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"))), + new AirbyteStreamState() + .withName(STREAM_NAME2) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"))), + new AirbyteStreamState() + .withName(STREAM_NAME3) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)))) + .stream().sorted(Comparator.comparing(AirbyteStreamState::getName)).collect(Collectors.toList())); + + final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); + assertEquals(expectedSecondEmission, actualSecondEmission); + } + + @Test + void testToStateNullCursorField() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + final StateManager stateManager = new PerStreamStateManager(new AirbyteStateMessage(), catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withStreams( + List.of( + new AirbyteStreamState() + .withName(STREAM_NAME1) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"))), + new AirbyteStreamState() + .withName(STREAM_NAME2) + .withNamespace(NAMESPACE) + .withState( + Jsons.jsonNode( + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE)))) + .stream().sorted(Comparator.comparing(AirbyteStreamState::getName)).collect(Collectors.toList())); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + } + + @Test + void testCdcStateManager() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final StateManager stateManager = new PerStreamStateManager(new AirbyteStateMessage(), catalog); + assertNotNull(stateManager.getCdcStateManager()); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java new file mode 100644 index 000000000000..175a7eb29318 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link StateManagerFactory} class. + */ +public class StateManagerFactoryTest { + + @Test + void testLegacyStateManagerCreationFromDbState() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final DbState state = mock(DbState.class); + final JsonNode config = mock(JsonNode.class); + + final StateManager stateManager = StateManagerFactory.createStateManager(state, catalog, config); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); + } + + @Test + void testLegacyStateManagerCreationFromAirbyteStateMessage() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); + final JsonNode config = mock(JsonNode.class); + when(airbyteStateMessage.getData()).thenReturn(Jsons.jsonNode(new DbState())); + + final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); + } + + @Test + void testCdcStateManagerCreation() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); + final JsonNode config = mock(JsonNode.class); + final JsonNode replicationConfig = mock(JsonNode.class); + + when(replicationConfig.hasNonNull("replication_slot")).thenReturn(true); + when(replicationConfig.hasNonNull("publication")).thenReturn(true); + + when(config.hasNonNull("replication_method")).thenReturn(true); + when(config.get("replication_method")).thenReturn(replicationConfig); + + final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); + + // TODO replace with non-null assertion and type assertion once the CDC state manager exists + Assertions.assertNull(stateManager); + } + + @Test + void testGlobalStateManagerCreation() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); + final JsonNode config = mock(JsonNode.class); + when(airbyteStateMessage.getStateType()).thenReturn(AirbyteStateType.GLOBAL); + + final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); + + // TODO replace with non-null assertion and type assertion once the Global state manager exists + Assertions.assertNull(stateManager); + } + + @Test + void testPerStreamStateManagerCreation() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); + final JsonNode config = mock(JsonNode.class); + when(airbyteStateMessage.getData()).thenReturn(null); + + final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(PerStreamStateManager.class, stateManager.getClass()); + } + + @Test + void testStateManagerCreationForUnknownStateObject() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final JsonNode config = mock(JsonNode.class); + + Assertions.assertThrows(IllegalArgumentException.class, () -> StateManagerFactory.createStateManager("Not Valid", catalog, config)); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateTestConstants.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateTestConstants.java new file mode 100644 index 000000000000..e939c9aea87d --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateTestConstants.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import org.testcontainers.shaded.com.google.common.collect.Lists; + +/** + * Collection of constants for use in state management-related tests. + */ +public final class StateTestConstants { + + public static final String NAMESPACE = "public"; + public static final String STREAM_NAME1 = "cars"; + public static final AirbyteStreamNameNamespacePair NAME_NAMESPACE_PAIR1 = new AirbyteStreamNameNamespacePair(STREAM_NAME1, NAMESPACE); + public static final String STREAM_NAME2 = "bicycles"; + public static final AirbyteStreamNameNamespacePair NAME_NAMESPACE_PAIR2 = new AirbyteStreamNameNamespacePair(STREAM_NAME2, NAMESPACE); + public static final String STREAM_NAME3 = "stationary_bicycles"; + public static final String CURSOR_FIELD1 = "year"; + public static final String CURSOR_FIELD2 = "generation"; + public static final String CURSOR = "2000"; + + private StateTestConstants() {} + + @SuppressWarnings("SameParameterValue") + public static Optional getState(final String cursorField, final String cursor) { + return Optional.of(new DbStreamState() + .withStreamName(STREAM_NAME1) + .withCursorField(Lists.newArrayList(cursorField)) + .withCursor(cursor)); + } + + public static Optional getCatalog(final String cursorField) { + return Optional.of(new ConfiguredAirbyteCatalog() + .withStreams(List.of(getStream(cursorField).orElse(null)))); + } + + public static Optional getStream(final String cursorField) { + return Optional.of(new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1)) + .withCursorField(cursorField == null ? Collections.emptyList() : Lists.newArrayList(cursorField))); + } + +} From 8ebec44a1af3d8f276f8cd151218d2b5e72195ab Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Wed, 8 Jun 2022 10:46:06 -0400 Subject: [PATCH 02/34] Fix failing test --- .../state/PerStreamStateManagerTest.java | 25 +++---------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java index 54b988eb5591..cbd34154ed4f 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java @@ -20,6 +20,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; @@ -112,16 +113,7 @@ void testToState() { final StateManager stateManager = new PerStreamStateManager(new AirbyteStateMessage(), catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - // .withData(Jsons.jsonNode(new DbState().withStreams(List - // .of( - // new - // DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) - // .withCursor("a"), - // new - // DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)), - // new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - // .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - // .withCdc(false))) + .withStateType(AirbyteStateType.PER_STREAM) .withStreams(List.of( new AirbyteStreamState() .withName(STREAM_NAME1) @@ -154,17 +146,7 @@ void testToState() { final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); assertEquals(expectedFirstEmission, actualFirstEmission); final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() - // .withData(Jsons.jsonNode(new DbState().withStreams(List - // .of( - // new - // DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) - // .withCursor("a"), - // new - // DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)) - // .withCursor("b"), - // new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - // .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - // .withCdc(false))) + .withStateType(AirbyteStateType.PER_STREAM) .withStreams(List.of( new AirbyteStreamState() .withName(STREAM_NAME1) @@ -212,6 +194,7 @@ void testToStateNullCursorField() { final StateManager stateManager = new PerStreamStateManager(new AirbyteStateMessage(), catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.PER_STREAM) .withStreams( List.of( new AirbyteStreamState() From 5158bad4128775641eef9e2f4c2d09262d9ae058 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Wed, 8 Jun 2022 11:37:41 -0400 Subject: [PATCH 03/34] Improve code coverage --- .../state/PerStreamStateManager.java | 11 ++---- .../state/StateManagerFactory.java | 4 ++- .../state/PerStreamStateManagerTest.java | 35 +++++++++++++++++++ .../state/StateManagerFactoryTest.java | 31 ++++++++++++++++ 4 files changed, 72 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java index fb45e0052691..fd5bc2d219bc 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java @@ -81,23 +81,18 @@ public CdcStateManager getCdcStateManager() { @Override public AirbyteStateMessage toState() { - final Map pairCursorInfoMap = getPairToCursorInfoMap(); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage(); - final List airbyteStreamStates = generatePerStreamState(pairCursorInfoMap); - - // TODO detect global? - + final List airbyteStreamStates = generatePerStreamState(); return airbyteStateMessage.withStateType(AirbyteStateType.PER_STREAM).withStreams(airbyteStreamStates); } /** * Generates the per-stream state for each stream. * - * @param pairCursorInfoMap The map of stream name/namespace to current cursor information. * @return The list of per-stream state. */ - private List generatePerStreamState(final Map pairCursorInfoMap) { - return pairCursorInfoMap.entrySet().stream() + private List generatePerStreamState() { + return getPairToCursorInfoMap().entrySet().stream() .filter(s -> s.getKey().getName() != null && s.getKey().getNamespace() != null) .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. .map(e -> new AirbyteStreamState() diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index 9a1a16cc988d..73dd39cc7011 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -5,6 +5,7 @@ package io.airbyte.integrations.source.relationaldb.state; import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.protocol.models.AirbyteStateMessage; @@ -66,7 +67,8 @@ public static StateManager createStateManager(final Object state, final Configur * @param config The connector configuration. * @return {@code true} if the connector utilizes CDC or {@code false} otherwise. */ - private static boolean isCdc(final JsonNode config) { + @VisibleForTesting + protected static boolean isCdc(final JsonNode config) { return config.hasNonNull("replication_method") && config.get("replication_method").hasNonNull("replication_slot") && config.get("replication_method").hasNonNull("publication"); diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java index cbd34154ed4f..37d81fddd528 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java @@ -13,11 +13,16 @@ import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getCatalog; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; @@ -26,7 +31,9 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import org.junit.jupiter.api.Assertions; @@ -228,4 +235,32 @@ void testCdcStateManager() { assertNotNull(stateManager.getCdcStateManager()); } + @Test + void testNullNameNamespacePairFiltered() { + final Map pairToCursorInfoMap = new HashMap<>(); + pairToCursorInfoMap.put(new AirbyteStreamNameNamespacePair(null, null), mock(CursorInfo.class)); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() + .withStreams(List.of()); + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final StateManager stateManager = spy(new PerStreamStateManager(airbyteStateMessage, catalog)); + when(stateManager.getPairToCursorInfoMap()).thenReturn(pairToCursorInfoMap); + + final AirbyteStateMessage result = stateManager.toState(); + assertNotNull(result); + assertEquals(0, result.getStreams().size()); + + pairToCursorInfoMap.clear(); + pairToCursorInfoMap.put(new AirbyteStreamNameNamespacePair("test", null), mock(CursorInfo.class)); + + final AirbyteStateMessage result2 = stateManager.toState(); + assertNotNull(result2); + assertEquals(0, result2.getStreams().size()); + + pairToCursorInfoMap.clear(); + pairToCursorInfoMap.put(new AirbyteStreamNameNamespacePair(null, "test"), mock(CursorInfo.class)); + + final AirbyteStateMessage result3 = stateManager.toState(); + assertNotNull(result3); + assertEquals(0, result3.getStreams().size()); + } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index 175a7eb29318..6fcee8369efa 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -4,6 +4,8 @@ package io.airbyte.integrations.source.relationaldb.state; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -99,4 +101,33 @@ void testStateManagerCreationForUnknownStateObject() { Assertions.assertThrows(IllegalArgumentException.class, () -> StateManagerFactory.createStateManager("Not Valid", catalog, config)); } + @Test + void testCdcDetectionLogic() { + final JsonNode config = mock(JsonNode.class); + final JsonNode replicationConfig = mock(JsonNode.class); + + when(replicationConfig.hasNonNull("replication_slot")).thenReturn(true); + when(replicationConfig.hasNonNull("publication")).thenReturn(true); + when(config.hasNonNull("replication_method")).thenReturn(true); + when(config.get("replication_method")).thenReturn(replicationConfig); + assertTrue(StateManagerFactory.isCdc(config)); + + when(replicationConfig.hasNonNull("replication_slot")).thenReturn(false); + assertFalse(StateManagerFactory.isCdc(config)); + + when(replicationConfig.hasNonNull("replication_slot")).thenReturn(true); + when(replicationConfig.hasNonNull("publication")).thenReturn(false); + assertFalse(StateManagerFactory.isCdc(config)); + + when(replicationConfig.hasNonNull("replication_slot")).thenReturn(true); + when(replicationConfig.hasNonNull("publication")).thenReturn(true); + when(config.hasNonNull("replication_method")).thenReturn(false); + assertFalse(StateManagerFactory.isCdc(config)); + + when(replicationConfig.hasNonNull("replication_slot")).thenReturn(false); + when(replicationConfig.hasNonNull("publication")).thenReturn(false); + when(config.hasNonNull("replication_method")).thenReturn(false); + assertFalse(StateManagerFactory.isCdc(config)); + } + } From 960919c55c692dce5989750a153504d0d6da4654 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Wed, 8 Jun 2022 16:03:10 -0400 Subject: [PATCH 04/34] Make global the default state manager --- .../source/relationaldb/state/StateManagerFactory.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index 73dd39cc7011..764e6b7964c9 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -44,13 +44,13 @@ public static StateManager createStateManager(final Object state, final Configur LOGGER.info("CDC state manager selected to manage state object with type {}.", state.getClass().getName()); // TODO create proper CDC state manager return null; - } else if (airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { + } else if (airbyteStateMessage.getStateType() == AirbyteStateType.PER_STREAM) { + LOGGER.info("Per stream state manager selected to manage state object with type {}.", state.getClass().getName()); + return new PerStreamStateManager(airbyteStateMessage, catalog); + } else { LOGGER.info("Global state manager selected to manage state object with type {}.", state.getClass().getName()); // TODO create proper Global state manager return null; - } else { - LOGGER.info("Per stream state manager selected to manage state object with type {}.", state.getClass().getName()); - return new PerStreamStateManager(airbyteStateMessage, catalog); } } else if (state instanceof DbState dbState) { LOGGER.info("Legacy state manager selected to manage state object with type {}.", state.getClass().getName()); From 89bc6a0bb145f5a7af95f80dc6832db2cc89aafd Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Thu, 9 Jun 2022 09:50:11 -0400 Subject: [PATCH 05/34] Add legacy adapter state manager --- .../state/LegacyAdapterStateManager.java | 49 +++++++++++++ .../state/StateManagerFactory.java | 4 +- .../state/LegacyAdapterStateManagerTest.java | 68 +++++++++++++++++++ .../state/StateManagerFactoryTest.java | 5 +- 4 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java new file mode 100644 index 000000000000..d2120585a065 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java @@ -0,0 +1,49 @@ +package io.airbyte.integrations.source.relationaldb.state; + +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; + +/** + * Variant of the {@link LegacyStateManager} that ensures that the state type is + * set on any state message generated by this manager. + *
+ *

+ * This manager exists to handle + * the case of a connector that has been updated to use this code, but has not yet + * been migrated to use the new per-stream state mechanics. This is a temporary state + * and this class will be removed once all connectors have been updated AND migrated + * to the new state management mechanism. + *

+ *
+ *

+ * N.B. This case is different from connectors who have not yet been released with + * code at or beyond the point at which this class was introduced. In that case, those connectors will + * continue to use the {@link LegacyStateManager}, as they will continue to receive the legacy state + * JSON as input. + *

+ * + * @deprecated This manager may be removed in the future if/once all connectors support per-stream + * state management. + */ +@Deprecated(forRemoval = true) +public class LegacyAdapterStateManager extends LegacyStateManager { + + /** + * Constructs a new {@link LegacyStateManager} that is seeded with the provided {@link DbState} instance. + * + * @param dbState The initial state represented as an {@link DbState} instance. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state manager. + */ + public LegacyAdapterStateManager(final DbState dbState, final ConfiguredAirbyteCatalog catalog) { + super(dbState, catalog); + } + + @Override + public AirbyteStateMessage toState() { + final AirbyteStateMessage airbyteStateMessage = super.toState(); + // TOD add the legacy state type once available. + //return airbyteStateMessage.withStateType(AirbyteStateType.LEGACY); + return airbyteStateMessage; + } +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index 764e6b7964c9..d39d6daca992 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -38,8 +38,8 @@ private StateManagerFactory() {} public static StateManager createStateManager(final Object state, final ConfiguredAirbyteCatalog catalog, final JsonNode config) { if (state instanceof AirbyteStateMessage airbyteStateMessage) { if (airbyteStateMessage.getData() != null) { - LOGGER.info("Legacy state manager selected to manage state object with type {}.", state.getClass().getName()); - return new LegacyStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); + LOGGER.info("Legacy adapter state manager selected to manage state object with type {}.", state.getClass().getName()); + return new LegacyAdapterStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); } else if (isCdc(config)) { LOGGER.info("CDC state manager selected to manage state object with type {}.", state.getClass().getName()); // TODO create proper CDC state manager diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java new file mode 100644 index 000000000000..9ec17dbd78a6 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java @@ -0,0 +1,68 @@ +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link LegacyAdapterStateManagerTest} class. + */ +public class LegacyAdapterStateManagerTest { + + @Test + void testToState() { + // TODO update to include state type once available + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new LegacyAdapterStateManager(new DbState(), catalog); + + final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(false))); + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + .withCdc(false))); + final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); + assertEquals(expectedSecondEmission, actualSecondEmission); + } +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index 6fcee8369efa..95519bed3398 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -36,7 +36,7 @@ void testLegacyStateManagerCreationFromDbState() { } @Test - void testLegacyStateManagerCreationFromAirbyteStateMessage() { + void testLegacyAdapterStateManagerCreationFromAirbyteStateMessage() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); final JsonNode config = mock(JsonNode.class); @@ -45,7 +45,7 @@ void testLegacyStateManagerCreationFromAirbyteStateMessage() { final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); Assertions.assertNotNull(stateManager); - Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); + Assertions.assertEquals(LegacyAdapterStateManager.class, stateManager.getClass()); } @Test @@ -86,6 +86,7 @@ void testPerStreamStateManagerCreation() { final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); final JsonNode config = mock(JsonNode.class); when(airbyteStateMessage.getData()).thenReturn(null); + when(airbyteStateMessage.getStateType()).thenReturn(AirbyteStateType.PER_STREAM); final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); From fa0a74250c0cfa8a65896598ca8afbdc8ac91a55 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Thu, 9 Jun 2022 09:51:55 -0400 Subject: [PATCH 06/34] Formatting --- .../state/LegacyAdapterStateManager.java | 35 +++++++++++-------- .../state/PerStreamStateManager.java | 1 - .../state/LegacyAdapterStateManagerTest.java | 27 ++++++++------ .../state/PerStreamStateManagerTest.java | 2 +- 4 files changed, 37 insertions(+), 28 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java index d2120585a065..80febe7c32bf 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java @@ -1,3 +1,7 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.source.relationaldb.state; import io.airbyte.integrations.source.relationaldb.models.DbState; @@ -5,22 +9,20 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; /** - * Variant of the {@link LegacyStateManager} that ensures that the state type is - * set on any state message generated by this manager. - *
+ * Variant of the {@link LegacyStateManager} that ensures that the state type is set on any state + * message generated by this manager.
*

- * This manager exists to handle - * the case of a connector that has been updated to use this code, but has not yet - * been migrated to use the new per-stream state mechanics. This is a temporary state - * and this class will be removed once all connectors have been updated AND migrated - * to the new state management mechanism. + * This manager exists to handle the case of a connector that has been updated to use this code, but + * has not yet been migrated to use the new per-stream state mechanics. This is a temporary state + * and this class will be removed once all connectors have been updated AND migrated to the new + * state management mechanism. *

*
*

- * N.B. This case is different from connectors who have not yet been released with - * code at or beyond the point at which this class was introduced. In that case, those connectors will - * continue to use the {@link LegacyStateManager}, as they will continue to receive the legacy state - * JSON as input. + * N.B. This case is different from connectors who have not yet been released with code at or + * beyond the point at which this class was introduced. In that case, those connectors will continue + * to use the {@link LegacyStateManager}, as they will continue to receive the legacy state JSON as + * input. *

* * @deprecated This manager may be removed in the future if/once all connectors support per-stream @@ -30,10 +32,12 @@ public class LegacyAdapterStateManager extends LegacyStateManager { /** - * Constructs a new {@link LegacyStateManager} that is seeded with the provided {@link DbState} instance. + * Constructs a new {@link LegacyStateManager} that is seeded with the provided {@link DbState} + * instance. * * @param dbState The initial state represented as an {@link DbState} instance. - * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state manager. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state + * manager. */ public LegacyAdapterStateManager(final DbState dbState, final ConfiguredAirbyteCatalog catalog) { super(dbState, catalog); @@ -43,7 +47,8 @@ public LegacyAdapterStateManager(final DbState dbState, final ConfiguredAirbyteC public AirbyteStateMessage toState() { final AirbyteStateMessage airbyteStateMessage = super.toState(); // TOD add the legacy state type once available. - //return airbyteStateMessage.withStateType(AirbyteStateType.LEGACY); + // return airbyteStateMessage.withStateType(AirbyteStateType.LEGACY); return airbyteStateMessage; } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java index fd5bc2d219bc..5931e0bb59bf 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java @@ -16,7 +16,6 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import java.util.function.Function; diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java index 9ec17dbd78a6..3ecd4157acf8 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java @@ -1,3 +1,7 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.source.relationaldb.state; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; @@ -45,24 +49,25 @@ void testToState() { final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() .withData(Jsons.jsonNode(new DbState().withStreams(List.of( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)), - new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) .withCdc(false))); final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); assertEquals(expectedFirstEmission, actualFirstEmission); final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() .withData(Jsons.jsonNode(new DbState().withStreams(List.of( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)) - .withCursor("b"), - new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) .withCdc(false))); final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); assertEquals(expectedSecondEmission, actualSecondEmission); } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java index 37d81fddd528..0362a84d8551 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java @@ -13,7 +13,6 @@ import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getCatalog; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.mockito.Mockito.mock; @@ -263,4 +262,5 @@ void testNullNameNamespacePairFiltered() { assertNotNull(result3); assertEquals(0, result3.getStreams().size()); } + } From a43cbb86c9de4ae00edbdcbca8aa9eda18e8bb98 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Fri, 10 Jun 2022 15:25:24 -0400 Subject: [PATCH 07/34] Include legacy state for backwards compatibility --- .../source/postgres/PostgresSource.java | 2 +- .../source/relationaldb/AbstractDbSource.java | 8 ++-- .../state/PerStreamStateManager.java | 29 ++++++++++++-- .../state/PerStreamStateManagerTest.java | 40 +++++++++++++++++++ 4 files changed, 70 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 8cf04cf02bd3..c67f55c9e586 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -409,7 +409,7 @@ private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { // TODO This is a temporary override so that the Postgres source can take advantage of per-stream // state. @Override - protected AirbyteStateMessage serializeState(final JsonNode stateJson) { + protected AirbyteStateMessage deserializeState(final JsonNode stateJson) { if (stateJson == null) { // TODO What should the default/empty state be -- per stream or global? return new AirbyteStateMessage() diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index 2def5cc59a3d..40cecf154370 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -106,7 +106,7 @@ public AutoCloseableIterator read(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final JsonNode state) throws Exception { - final StateManager stateManager = StateManagerFactory.createStateManager(serializeState(state), catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(deserializeState(state), catalog, config); final Instant emittedAt = Instant.now(); final Database database = createDatabaseInternal(config); @@ -511,12 +511,12 @@ private Database createDatabaseInternal(final JsonNode sourceConfig) throws Exce } /** - * Serializes the state represented as JSON into an object representation. + * Deserializes the state represented as JSON into an object representation. * * @param stateJson The state as JSON. - * @return The serialized object representation of the state. + * @return The deserialized object representation of the state. */ - protected AirbyteStateMessage serializeState(final JsonNode stateJson) { + protected AirbyteStateMessage deserializeState(final JsonNode stateJson) { if (stateJson == null) { // For backwards compatibility with existing connectors return new AirbyteStateMessage().withData(Jsons.jsonNode(new DbState())); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java index 5931e0bb59bf..c0146aa79155 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java @@ -9,6 +9,7 @@ import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.source.relationaldb.CdcStateManager; import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; @@ -16,6 +17,7 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import java.util.function.Function; @@ -80,18 +82,24 @@ public CdcStateManager getCdcStateManager() { @Override public AirbyteStateMessage toState() { + final Map pairToCursorInfoMap = getPairToCursorInfoMap(); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage(); - final List airbyteStreamStates = generatePerStreamState(); - return airbyteStateMessage.withStateType(AirbyteStateType.PER_STREAM).withStreams(airbyteStreamStates); + final List airbyteStreamStates = generatePerStreamState(pairToCursorInfoMap); + return airbyteStateMessage + .withStateType(AirbyteStateType.PER_STREAM) + // Temporarily include legacy state for backwards compatibility with the platform + .withData(Jsons.jsonNode(generateDbState(pairToCursorInfoMap))) + .withStreams(airbyteStreamStates); } /** * Generates the per-stream state for each stream. * + * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor information for that stream * @return The list of per-stream state. */ - private List generatePerStreamState() { - return getPairToCursorInfoMap().entrySet().stream() + private List generatePerStreamState(final Map pairToCursorInfoMap) { + return pairToCursorInfoMap.entrySet().stream() .filter(s -> s.getKey().getName() != null && s.getKey().getNamespace() != null) .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. .map(e -> new AirbyteStreamState() @@ -101,6 +109,19 @@ private List generatePerStreamState() { .collect(Collectors.toList()); } + /** + * Generates the legacy global state for backwards compatibility. + * + * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor information for that stream + * @return The legacy {@link DbState}. + */ + private DbState generateDbState(final Map pairToCursorInfoMap) { + return new DbState().withStreams(pairToCursorInfoMap.entrySet().stream() + .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. + .map(e -> generateDbStreamState(e.getKey(), e.getValue())) + .collect(Collectors.toList())); + } + /** * Generates the {@link DbStreamState} for the given stream and cursor. * diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java index 0362a84d8551..f411f2c8bdbc 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java @@ -22,6 +22,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; @@ -120,6 +121,20 @@ void testToState() { final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() .withStateType(AirbyteStateType.PER_STREAM) + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE) + ).stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())))) .withStreams(List.of( new AirbyteStreamState() .withName(STREAM_NAME1) @@ -153,6 +168,21 @@ void testToState() { assertEquals(expectedFirstEmission, actualFirstEmission); final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() .withStateType(AirbyteStateType.PER_STREAM) + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE) + ).stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())))) .withStreams(List.of( new AirbyteStreamState() .withName(STREAM_NAME1) @@ -201,6 +231,16 @@ void testToStateNullCursorField() { final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() .withStateType(AirbyteStateType.PER_STREAM) + .withData(Jsons.jsonNode(new DbState().withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + ).stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())))) .withStreams( List.of( new AirbyteStreamState() From b1086044ffcb8a47a478b554da810302e7d9f5d4 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Fri, 10 Jun 2022 15:38:13 -0400 Subject: [PATCH 08/34] Add global state manager --- .../state/GlobalStateManager.java | 57 +++++++++ .../state/LegacyStateManager.java | 6 +- .../state/PerStreamStateManager.java | 121 ++---------------- .../state/StateGeneratorUtils.java | 120 +++++++++++++++++ .../state/StateManagerFactory.java | 3 +- .../state/StateManagerFactoryTest.java | 4 +- 6 files changed, 194 insertions(+), 117 deletions(-) create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java new file mode 100644 index 000000000000..686565998f64 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java @@ -0,0 +1,57 @@ +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FIELD_FUNCTION; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FUNCTION; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.NAME_NAMESPACE_PAIR_FUNCTION; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CdcStateManager; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Map; + +/** + * Global implementation of the {@link StateManager} interface. + * + * This implementation generates a single, global state object for the state + * tracked by this manager. + */ +public class GlobalStateManager extends AbstractStateManager { + + /** + * Constructs a new {@link GlobalStateManager} that is seeded with the provided + * {@link AirbyteStateMessage}. + * + * @param airbyteStateMessage The initial state represented as an {@link AirbyteStateMessage}. + * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state + * manager. + */ + public GlobalStateManager(final AirbyteStateMessage airbyteStateMessage, final ConfiguredAirbyteCatalog catalog) { + super(catalog, + () -> airbyteStateMessage.getStreams(), + CURSOR_FUNCTION, + CURSOR_FIELD_FUNCTION, + NAME_NAMESPACE_PAIR_FUNCTION); + } + + @Override + public CdcStateManager getCdcStateManager() { + return null; + } + + @Override + public AirbyteStateMessage toState() { + final Map pairToCursorInfoMap = getPairToCursorInfoMap(); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage(); + return airbyteStateMessage + .withStateType(AirbyteStateType.GLOBAL) + // Temporarily include legacy state for backwards compatibility with the platform + .withData(Jsons.jsonNode(StateGeneratorUtils.generateDbState(pairToCursorInfoMap))) + // TODO generate global state + .withGlobal(null); + } +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java index 2613ebe4bdcd..70d418eff0f6 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java @@ -95,11 +95,7 @@ public AirbyteStateMessage toState() { .withCdc(isCdc) .withStreams(getPairToCursorInfoMap().entrySet().stream() .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> new DbStreamState() - .withStreamName(e.getKey().getName()) - .withStreamNamespace(e.getKey().getNamespace()) - .withCursorField(e.getValue().getCursorField() == null ? Collections.emptyList() : List.of(e.getValue().getCursorField())) - .withCursor(e.getValue().getCursor())) + .map(e -> StateGeneratorUtils.generateDbStreamState(e.getKey(), e.getValue())) .collect(Collectors.toList())) .withCdcState(getCdcStateManager().getCdcState()); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java index c0146aa79155..9e8f31555243 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java @@ -4,61 +4,29 @@ package io.airbyte.integrations.source.relationaldb.state; -import com.google.common.collect.Lists; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FIELD_FUNCTION; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FUNCTION; +import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.NAME_NAMESPACE_PAIR_FUNCTION; + import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.source.relationaldb.CdcStateManager; import io.airbyte.integrations.source.relationaldb.CursorInfo; -import io.airbyte.integrations.source.relationaldb.models.DbState; -import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +/** + * Per-stream implementation of the {@link StateManager} interface. + * + * This implementation generates a state object for each stream detected in catalog/map of known + * streams to cursor information stored in this manager. + */ public class PerStreamStateManager extends AbstractStateManager { - private static final Logger LOGGER = LoggerFactory.getLogger(PerStreamStateManager.class); - - /** - * {@link Function} that extracts the cursor from the stream state. - */ - private static final Function CURSOR_FUNCTION = stream -> { - final Optional dbStreamState = extractState(stream); - if (dbStreamState.isPresent()) { - return dbStreamState.get().getCursor(); - } else { - return null; - } - }; - - /** - * {@link Function} that extracts the cursor field(s) from the stream state. - */ - private static final Function> CURSOR_FIELD_FUNCTION = stream -> { - final Optional dbStreamState = extractState(stream); - if (dbStreamState.isPresent()) { - return dbStreamState.get().getCursorField(); - } else { - return List.of(); - } - }; - - /** - * {@link Function} that creates an {@link AirbyteStreamNameNamespacePair} from the stream state. - */ - private static final Function NAME_NAMESPACE_PAIR_FUNCTION = - s -> new AirbyteStreamNameNamespacePair(s.getName(), s.getNamespace()); - /** * Constructs a new {@link PerStreamStateManager} that is seeded with the provided * {@link AirbyteStateMessage}. @@ -84,74 +52,11 @@ public CdcStateManager getCdcStateManager() { public AirbyteStateMessage toState() { final Map pairToCursorInfoMap = getPairToCursorInfoMap(); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage(); - final List airbyteStreamStates = generatePerStreamState(pairToCursorInfoMap); + final List airbyteStreamStates = StateGeneratorUtils.generatePerStreamState(pairToCursorInfoMap); return airbyteStateMessage .withStateType(AirbyteStateType.PER_STREAM) // Temporarily include legacy state for backwards compatibility with the platform - .withData(Jsons.jsonNode(generateDbState(pairToCursorInfoMap))) + .withData(Jsons.jsonNode(StateGeneratorUtils.generateDbState(pairToCursorInfoMap))) .withStreams(airbyteStreamStates); } - - /** - * Generates the per-stream state for each stream. - * - * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor information for that stream - * @return The list of per-stream state. - */ - private List generatePerStreamState(final Map pairToCursorInfoMap) { - return pairToCursorInfoMap.entrySet().stream() - .filter(s -> s.getKey().getName() != null && s.getKey().getNamespace() != null) - .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> new AirbyteStreamState() - .withName(e.getKey().getName()) - .withNamespace(e.getKey().getNamespace()) - .withState(Jsons.jsonNode(generateDbStreamState(e.getKey(), e.getValue())))) - .collect(Collectors.toList()); - } - - /** - * Generates the legacy global state for backwards compatibility. - * - * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor information for that stream - * @return The legacy {@link DbState}. - */ - private DbState generateDbState(final Map pairToCursorInfoMap) { - return new DbState().withStreams(pairToCursorInfoMap.entrySet().stream() - .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> generateDbStreamState(e.getKey(), e.getValue())) - .collect(Collectors.toList())); - } - - /** - * Generates the {@link DbStreamState} for the given stream and cursor. - * - * @param airbyteStreamNameNamespacePair The stream. - * @param cursorInfo The current cursor. - * @return The {@link DbStreamState}. - */ - private DbStreamState generateDbStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, final CursorInfo cursorInfo) { - return new DbStreamState() - .withStreamName(airbyteStreamNameNamespacePair.getName()) - .withStreamNamespace(airbyteStreamNameNamespacePair.getNamespace()) - .withCursorField(cursorInfo.getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(cursorInfo.getCursorField())) - .withCursor(cursorInfo.getCursor()); - } - - /** - * Extracts the actual state from the {@link AirbyteStreamState} object. - * - * @param state The {@link AirbyteStreamState} that contains the actual stream state as JSON. - * @return An {@link Optional} possibly containing the deserialized representation of the stream - * state or an empty {@link Optional} if the state is not present or could not be - * deserialized. - */ - private static Optional extractState(final AirbyteStreamState state) { - try { - return Optional.ofNullable(Jsons.object(state.getState(), DbStreamState.class)); - } catch (final IllegalArgumentException e) { - LOGGER.error("Unable to extract state.", e); - return Optional.empty(); - } - } - -} +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java new file mode 100644 index 000000000000..f74b0e6d4846 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java @@ -0,0 +1,120 @@ +package io.airbyte.integrations.source.relationaldb.state; + +import com.google.common.collect.Lists; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStreamState; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Collection of utilities that facilitate the generation of state objects. + */ +public class StateGeneratorUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(StateGeneratorUtils.class); + + /** + * {@link Function} that extracts the cursor from the stream state. + */ + public static final Function CURSOR_FUNCTION = stream -> { + final Optional dbStreamState = StateGeneratorUtils.extractState(stream); + if (dbStreamState.isPresent()) { + return dbStreamState.get().getCursor(); + } else { + return null; + } + }; + + /** + * {@link Function} that extracts the cursor field(s) from the stream state. + */ + public static final Function> CURSOR_FIELD_FUNCTION = stream -> { + final Optional dbStreamState = StateGeneratorUtils.extractState(stream); + if (dbStreamState.isPresent()) { + return dbStreamState.get().getCursorField(); + } else { + return List.of(); + } + }; + + /** + * {@link Function} that creates an {@link AirbyteStreamNameNamespacePair} from the stream state. + */ + public static final Function NAME_NAMESPACE_PAIR_FUNCTION = + s -> new AirbyteStreamNameNamespacePair(s.getName(), s.getNamespace()); + + private StateGeneratorUtils() {} + + /** + * Generates the per-stream state for each stream. + * + * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor information for that stream + * @return The list of per-stream state. + */ + public static List generatePerStreamState(final Map pairToCursorInfoMap) { + return pairToCursorInfoMap.entrySet().stream() + .filter(s -> s.getKey().getName() != null && s.getKey().getNamespace() != null) + .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. + .map(e -> new AirbyteStreamState() + .withName(e.getKey().getName()) + .withNamespace(e.getKey().getNamespace()) + .withState(Jsons.jsonNode(generateDbStreamState(e.getKey(), e.getValue())))) + .collect(Collectors.toList()); + } + + /** + * Generates the legacy global state for backwards compatibility. + * + * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor information for that stream + * @return The legacy {@link DbState}. + */ + public static DbState generateDbState(final Map pairToCursorInfoMap) { + return new DbState().withStreams(pairToCursorInfoMap.entrySet().stream() + .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. + .map(e -> generateDbStreamState(e.getKey(), e.getValue())) + .collect(Collectors.toList())); + } + + /** + * Generates the {@link DbStreamState} for the given stream and cursor. + * + * @param airbyteStreamNameNamespacePair The stream. + * @param cursorInfo The current cursor. + * @return The {@link DbStreamState}. + */ + public static DbStreamState generateDbStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, final CursorInfo cursorInfo) { + return new DbStreamState() + .withStreamName(airbyteStreamNameNamespacePair.getName()) + .withStreamNamespace(airbyteStreamNameNamespacePair.getNamespace()) + .withCursorField(cursorInfo.getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(cursorInfo.getCursorField())) + .withCursor(cursorInfo.getCursor()); + } + + /** + * Extracts the actual state from the {@link AirbyteStreamState} object. + * + * @param state The {@link AirbyteStreamState} that contains the actual stream state as JSON. + * @return An {@link Optional} possibly containing the deserialized representation of the stream + * state or an empty {@link Optional} if the state is not present or could not be + * deserialized. + */ + public static Optional extractState(final AirbyteStreamState state) { + try { + return Optional.ofNullable(Jsons.object(state.getState(), DbStreamState.class)); + } catch (final IllegalArgumentException e) { + LOGGER.error("Unable to extract state.", e); + return Optional.empty(); + } + } +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index d39d6daca992..1c92cc6abd71 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -49,8 +49,7 @@ public static StateManager createStateManager(final Object state, final Configur return new PerStreamStateManager(airbyteStateMessage, catalog); } else { LOGGER.info("Global state manager selected to manage state object with type {}.", state.getClass().getName()); - // TODO create proper Global state manager - return null; + return new GlobalStateManager(airbyteStateMessage, catalog); } } else if (state instanceof DbState dbState) { LOGGER.info("Legacy state manager selected to manage state object with type {}.", state.getClass().getName()); diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index 95519bed3398..4df2964deb1e 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -76,8 +76,8 @@ void testGlobalStateManagerCreation() { final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); - // TODO replace with non-null assertion and type assertion once the Global state manager exists - Assertions.assertNull(stateManager); + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); } @Test From f3f2499eb177764264cd1948d3545c172271e226 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Mon, 13 Jun 2022 15:42:03 -0400 Subject: [PATCH 09/34] Implement Global/CDC state handling --- .../source/postgres/PostgresSource.java | 14 +- .../source/relationaldb/AbstractDbSource.java | 10 +- .../state/AbstractStateManager.java | 2 +- .../AirbyteStateMessageListTypeReference.java | 13 + .../relationaldb/state/CursorManager.java | 2 +- .../state/GlobalStateManager.java | 94 +++++- .../state/LegacyAdapterStateManager.java | 54 ---- .../state/LegacyStateManager.java | 18 +- .../state/StateGeneratorUtils.java | 78 +++-- .../relationaldb/state/StateManager.java | 15 +- .../state/StateManagerFactory.java | 34 +- ...teManager.java => StreamStateManager.java} | 44 ++- .../relationaldb/state/CursorManagerTest.java | 28 -- .../state/GlobalStateManagerTest.java | 203 ++++++++++++ .../state/LegacyAdapterStateManagerTest.java | 73 ----- .../state/LegacyStateManagerTest.java | 6 + .../state/PerStreamStateManagerTest.java | 306 ------------------ .../state/StateGeneratorUtilsTest.java | 33 ++ .../state/StateManagerFactoryTest.java | 103 +++--- .../state/StreamStateManagerTest.java | 227 +++++++++++++ 20 files changed, 743 insertions(+), 614 deletions(-) create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AirbyteStateMessageListTypeReference.java delete mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java rename airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/{PerStreamStateManager.java => StreamStateManager.java} (50%) create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManagerTest.java delete mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java delete mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index c67f55c9e586..016e0071c8da 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -27,6 +27,7 @@ import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.jdbc.dto.JdbcPrivilegeDto; import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.integrations.source.relationaldb.state.AirbyteStateMessageListTypeReference; import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; @@ -34,6 +35,7 @@ import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.SyncMode; @@ -409,18 +411,18 @@ private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { // TODO This is a temporary override so that the Postgres source can take advantage of per-stream // state. @Override - protected AirbyteStateMessage deserializeState(final JsonNode stateJson) { + protected List deserializeState(final JsonNode stateJson) { if (stateJson == null) { // TODO What should the default/empty state be -- per stream or global? - return new AirbyteStateMessage() - .withStateType(AirbyteStateType.PER_STREAM) - .withStreams(List.of()); + return List.of(new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState())); } else { try { - return Jsons.object(stateJson, AirbyteStateMessage.class); + return Jsons.object(stateJson, new AirbyteStateMessageListTypeReference()); } catch (final IllegalArgumentException e) { LOGGER.warn("Defaulting to legacy state object..."); - return new AirbyteStateMessage().withData(stateJson); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(stateJson)); } } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index 40cecf154370..4dc28d1bcdd7 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -20,6 +20,7 @@ import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.state.AirbyteStateMessageListTypeReference; import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.integrations.source.relationaldb.state.StateManagerFactory; import io.airbyte.protocol.models.AirbyteCatalog; @@ -29,6 +30,7 @@ import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.CommonField; @@ -516,16 +518,16 @@ private Database createDatabaseInternal(final JsonNode sourceConfig) throws Exce * @param stateJson The state as JSON. * @return The deserialized object representation of the state. */ - protected AirbyteStateMessage deserializeState(final JsonNode stateJson) { + protected List deserializeState(final JsonNode stateJson) { if (stateJson == null) { // For backwards compatibility with existing connectors - return new AirbyteStateMessage().withData(Jsons.jsonNode(new DbState())); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(new DbState()))); } else { try { - return Jsons.object(stateJson, AirbyteStateMessage.class); + return Jsons.object(stateJson, new AirbyteStateMessageListTypeReference()); } catch (final IllegalArgumentException e) { LOGGER.warn("Defaulting to legacy state object..."); - return new AirbyteStateMessage().withData(stateJson); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(stateJson)); } } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java index cac2ad478c42..6c56a8a2c79a 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java @@ -57,6 +57,6 @@ public Map getPairToCursorInfoMap() } @Override - public abstract AirbyteStateMessage toState(); + public abstract AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair); } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AirbyteStateMessageListTypeReference.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AirbyteStateMessageListTypeReference.java new file mode 100644 index 000000000000..c7e153e6d79a --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AirbyteStateMessageListTypeReference.java @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import com.fasterxml.jackson.core.type.TypeReference; +import io.airbyte.protocol.models.AirbyteStateMessage; +import java.util.List; + +public class AirbyteStateMessageListTypeReference extends TypeReference> { + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java index 5006467882bb..86038797e55b 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java @@ -87,7 +87,7 @@ protected Map createCursorInfoMap( .map(ConfiguredAirbyteStream::getStream) .map(AirbyteStreamNameNamespacePair::fromAirbyteSteam) .collect(Collectors.toSet()); - allStreamNames.addAll(streamSupplier.get().stream().map(namespacePairFunction).collect(Collectors.toSet())); + allStreamNames.addAll(streamSupplier.get().stream().map(namespacePairFunction).filter(n -> n != null).collect(Collectors.toSet())); final Map localMap = new HashMap<>(); final Map pairToState = streamSupplier.get() diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java index 686565998f64..1e9f02af7c96 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java @@ -1,3 +1,7 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.source.relationaldb.state; import static io.airbyte.integrations.source.relationaldb.state.StateGeneratorUtils.CURSOR_FIELD_FUNCTION; @@ -7,21 +11,32 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.source.relationaldb.CdcStateManager; -import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.protocol.models.AirbyteGlobalState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import java.util.Map; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.Collection; +import java.util.function.Supplier; +import java.util.stream.Collectors; /** * Global implementation of the {@link StateManager} interface. * - * This implementation generates a single, global state object for the state - * tracked by this manager. + * This implementation generates a single, global state object for the state tracked by this + * manager. */ public class GlobalStateManager extends AbstractStateManager { + /** + * Legacy {@link CdcStateManager} used to manage state for connectors that support Change Data + * Capture (CDC). + */ + private final CdcStateManager cdcStateManager; + /** * Constructs a new {@link GlobalStateManager} that is seeded with the provided * {@link AirbyteStateMessage}. @@ -32,26 +47,75 @@ public class GlobalStateManager extends AbstractStateManager airbyteStateMessage.getStreams(), + getStreamsSupplier(airbyteStateMessage), CURSOR_FUNCTION, CURSOR_FIELD_FUNCTION, NAME_NAMESPACE_PAIR_FUNCTION); + + this.cdcStateManager = new CdcStateManager(extractCdcState(airbyteStateMessage)); } @Override public CdcStateManager getCdcStateManager() { - return null; + return cdcStateManager; } @Override - public AirbyteStateMessage toState() { - final Map pairToCursorInfoMap = getPairToCursorInfoMap(); - final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage(); - return airbyteStateMessage - .withStateType(AirbyteStateType.GLOBAL) - // Temporarily include legacy state for backwards compatibility with the platform - .withData(Jsons.jsonNode(StateGeneratorUtils.generateDbState(pairToCursorInfoMap))) - // TODO generate global state - .withGlobal(null); + public AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair) { + // Populate global state + final AirbyteGlobalState globalState = new AirbyteGlobalState(); + globalState.setSharedState(Jsons.jsonNode(getCdcStateManager().getCdcState())); + globalState.setStreamStates(StateGeneratorUtils.generateStreamStateList(getPairToCursorInfoMap())); + + // Generate the legacy state for backwards compatibility + final DbState dbState = StateGeneratorUtils.generateDbState(getPairToCursorInfoMap()) + .withCdc(true) + .withCdcState(getCdcStateManager().getCdcState()); + + return new AirbyteStateMessage() + .withStateType(AirbyteStateType.GLOBAL) + // Temporarily include legacy state for backwards compatibility with the platform + .withData(Jsons.jsonNode(dbState)) + .withGlobal(globalState); + } + + /** + * Extracts the Change Data Capture (CDC) state stored in the initial state provided to this state + * manager. + * + * @param airbyteStateMessage The {@link AirbyteStateMessage} that contains the initial state + * provided to the state manager. + * @return The {@link CdcState} stored in the state, if any. Note that this will not be {@code null} + * but may be empty. + */ + private CdcState extractCdcState(final AirbyteStateMessage airbyteStateMessage) { + if (airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { + return Jsons.object(airbyteStateMessage.getGlobal().getSharedState(), DbState.class).getCdcState(); + } else { + return Jsons.object(airbyteStateMessage.getData(), DbState.class).getCdcState(); } + } + + /** + * Generates the {@link Supplier} that will be used to extract the streams from the incoming + * {@link AirbyteStateMessage}. + * + * @param airbyteStateMessage The {@link AirbyteStateMessage} supplied to this state manager with + * the initial state. + * @return A {@link Supplier} that will be used to fetch the streams present in the initial state. + */ + private static Supplier> getStreamsSupplier(final AirbyteStateMessage airbyteStateMessage) { + /* + * If the incoming message has the state type set to GLOBAL, it is using the new format. Therefore, + * we can look for streams in the "global" field of the message. Otherwise, the message is still + * storing state in the legacy "data" field. + */ + return () -> airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL ? airbyteStateMessage.getGlobal().getStreamStates() + : Jsons.object(airbyteStateMessage.getData(), DbState.class).getStreams().stream() + .map(s -> new AirbyteStreamState().withStreamState(Jsons.jsonNode(s)) + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName()))) + .collect( + Collectors.toList()); + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java deleted file mode 100644 index 80febe7c32bf..000000000000 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManager.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.relationaldb.state; - -import io.airbyte.integrations.source.relationaldb.models.DbState; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; - -/** - * Variant of the {@link LegacyStateManager} that ensures that the state type is set on any state - * message generated by this manager.
- *

- * This manager exists to handle the case of a connector that has been updated to use this code, but - * has not yet been migrated to use the new per-stream state mechanics. This is a temporary state - * and this class will be removed once all connectors have been updated AND migrated to the new - * state management mechanism. - *

- *
- *

- * N.B. This case is different from connectors who have not yet been released with code at or - * beyond the point at which this class was introduced. In that case, those connectors will continue - * to use the {@link LegacyStateManager}, as they will continue to receive the legacy state JSON as - * input. - *

- * - * @deprecated This manager may be removed in the future if/once all connectors support per-stream - * state management. - */ -@Deprecated(forRemoval = true) -public class LegacyAdapterStateManager extends LegacyStateManager { - - /** - * Constructs a new {@link LegacyStateManager} that is seeded with the provided {@link DbState} - * instance. - * - * @param dbState The initial state represented as an {@link DbState} instance. - * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state - * manager. - */ - public LegacyAdapterStateManager(final DbState dbState, final ConfiguredAirbyteCatalog catalog) { - super(dbState, catalog); - } - - @Override - public AirbyteStateMessage toState() { - final AirbyteStateMessage airbyteStateMessage = super.toState(); - // TOD add the legacy state type once available. - // return airbyteStateMessage.withStateType(AirbyteStateType.LEGACY); - return airbyteStateMessage; - } - -} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java index 70d418eff0f6..ec3e666b5f0e 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java @@ -12,13 +12,11 @@ import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import java.util.Collections; import java.util.List; -import java.util.Map.Entry; import java.util.Optional; import java.util.function.Function; -import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -90,17 +88,13 @@ public CdcStateManager getCdcStateManager() { } @Override - public AirbyteStateMessage toState() { - final DbState DbState = new DbState() + public AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair) { + final DbState dbState = StateGeneratorUtils.generateDbState(getPairToCursorInfoMap()) .withCdc(isCdc) - .withStreams(getPairToCursorInfoMap().entrySet().stream() - .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> StateGeneratorUtils.generateDbStreamState(e.getKey(), e.getValue())) - .collect(Collectors.toList())) .withCdcState(getCdcStateManager().getCdcState()); - LOGGER.info("Generated legacy state for {} streams"); - return new AirbyteStateMessage().withData(Jsons.jsonNode(DbState)); + LOGGER.info("Generated legacy state for {} streams", dbState.getStreams().size()); + return new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(dbState)); } @Override @@ -112,7 +106,7 @@ public AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pa cursorInfo.get().setCursor(cursor); } - return toState(); + return toState(pair); } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java index f74b0e6d4846..a8ab92e2294e 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java @@ -1,3 +1,7 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.source.relationaldb.state; import com.google.common.collect.Lists; @@ -7,6 +11,7 @@ import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.StreamDescriptor; import java.util.Collections; import java.util.List; import java.util.Map; @@ -29,11 +34,7 @@ public class StateGeneratorUtils { */ public static final Function CURSOR_FUNCTION = stream -> { final Optional dbStreamState = StateGeneratorUtils.extractState(stream); - if (dbStreamState.isPresent()) { - return dbStreamState.get().getCursor(); - } else { - return null; - } + return dbStreamState.map(DbStreamState::getCursor).orElse(null); }; /** @@ -52,34 +53,53 @@ public class StateGeneratorUtils { * {@link Function} that creates an {@link AirbyteStreamNameNamespacePair} from the stream state. */ public static final Function NAME_NAMESPACE_PAIR_FUNCTION = - s -> new AirbyteStreamNameNamespacePair(s.getName(), s.getNamespace()); + s -> isValidStreamDescriptor(s.getStreamDescriptor()) + ? new AirbyteStreamNameNamespacePair(s.getStreamDescriptor().getName(), s.getStreamDescriptor().getNamespace()) + : null; private StateGeneratorUtils() {} /** - * Generates the per-stream state for each stream. + * Generates the stream state for the given stream and cursor information. * - * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor information for that stream - * @return The list of per-stream state. + * @param airbyteStreamNameNamespacePair The stream. + * @param cursorInfo The current cursor. + * @return The {@link AirbyteStreamState} representing the current state of the stream. */ - public static List generatePerStreamState(final Map pairToCursorInfoMap) { + public static AirbyteStreamState generateStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair, + final CursorInfo cursorInfo) { + return new AirbyteStreamState() + .withStreamDescriptor( + new StreamDescriptor().withName(airbyteStreamNameNamespacePair.getName()).withNamespace(airbyteStreamNameNamespacePair.getNamespace())) + .withStreamState(Jsons.jsonNode(generateDbStreamState(airbyteStreamNameNamespacePair, cursorInfo))); + } + + /** + * Generates a list of valid stream states from the provided stream and cursor information. A stream + * state is considered to be valid if the stream has a valid descriptor (see + * {@link #isValidStreamDescriptor(StreamDescriptor)} for more details). + * + * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor + * information for that stream + * @return The list of stream states derived from the state information extracted from the provided + * map. + */ + public static List generateStreamStateList(final Map pairToCursorInfoMap) { return pairToCursorInfoMap.entrySet().stream() - .filter(s -> s.getKey().getName() != null && s.getKey().getNamespace() != null) - .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> new AirbyteStreamState() - .withName(e.getKey().getName()) - .withNamespace(e.getKey().getNamespace()) - .withState(Jsons.jsonNode(generateDbStreamState(e.getKey(), e.getValue())))) + .sorted(Entry.comparingByKey()) + .map(e -> generateStreamState(e.getKey(), e.getValue())) + .filter(s -> isValidStreamDescriptor(s.getStreamDescriptor())) .collect(Collectors.toList()); } /** * Generates the legacy global state for backwards compatibility. * - * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor information for that stream + * @param pairToCursorInfoMap The map of stream name/namespace tuple to the current cursor + * information for that stream * @return The legacy {@link DbState}. */ - public static DbState generateDbState(final Map pairToCursorInfoMap) { + public static DbState generateDbState(final Map pairToCursorInfoMap) { return new DbState().withStreams(pairToCursorInfoMap.entrySet().stream() .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. .map(e -> generateDbStreamState(e.getKey(), e.getValue())) @@ -93,7 +113,8 @@ public static DbState generateDbState(final Map extractState(final AirbyteStreamState state) { try { - return Optional.ofNullable(Jsons.object(state.getState(), DbStreamState.class)); + return Optional.ofNullable(Jsons.object(state.getStreamState(), DbStreamState.class)); } catch (final IllegalArgumentException e) { LOGGER.error("Unable to extract state.", e); return Optional.empty(); } } + + /** + * Tests whether the provided {@link StreamDescriptor} is valid. A valid descriptor is defined as + * one that has both a non-{@code null} name and non-{@code null} namespace. + * + * @param streamDescriptor A {@link StreamDescriptor} to be validated. + * @return {@code true} if the provided {@link StreamDescriptor} is valid or {@code false} if it is + * invalid. + */ + public static boolean isValidStreamDescriptor(final StreamDescriptor streamDescriptor) { + if (streamDescriptor != null) { + return streamDescriptor.getName() != null && streamDescriptor.getNamespace() != null; + } else { + return false; + } + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java index 1f6ded0cab07..4b25139247c4 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java @@ -25,10 +25,7 @@ public interface StateManager { * Retrieves the {@link CdcStateManager} associated with the state manager. * * @return The {@link CdcStateManager} - * @deprecated This method will be removed in the future in favor of a state manager that supports - * CDC-related state. */ - @Deprecated(forRemoval = true) CdcStateManager getCdcStateManager(); /** @@ -44,10 +41,12 @@ public interface StateManager { * Generates an {@link AirbyteStateMessage} that represents the current state contained in the state * manager. * + * @param pair The {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the + * state manager. * @return The {@link AirbyteStateMessage} that represents the current state contained in the state * manager. */ - AirbyteStateMessage toState(); + AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair); /** * Retrieves an {@link Optional} possibly containing the cursor value tracked in the state @@ -113,11 +112,13 @@ default Optional getCursorInfo(final AirbyteStreamNameNamespacePair /** * Emits the current state maintained by the manager as an {@link AirbyteStateMessage}. * + * @param pair The {@link AirbyteStreamNameNamespacePair} that represents a stream managed by the + * state manager. * @return An {@link AirbyteStateMessage} that represents the current state maintained by the state * manager. */ - default AirbyteStateMessage emit() { - return toState(); + default AirbyteStateMessage emit(final AirbyteStreamNameNamespacePair pair) { + return toState(pair); } /** @@ -136,7 +137,7 @@ default AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair p final Optional cursorInfo = getCursorInfo(pair); Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); cursorInfo.get().setCursor(cursor); - return emit(); + return emit(pair); } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index 1c92cc6abd71..84e7dced88c6 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -11,6 +11,7 @@ import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,28 +36,23 @@ private StateManagerFactory() {} * @param config The connector configuration. * @return A newly created {@link StateManager} implementation based on the provided state. */ - public static StateManager createStateManager(final Object state, final ConfiguredAirbyteCatalog catalog, final JsonNode config) { - if (state instanceof AirbyteStateMessage airbyteStateMessage) { - if (airbyteStateMessage.getData() != null) { - LOGGER.info("Legacy adapter state manager selected to manage state object with type {}.", state.getClass().getName()); - return new LegacyAdapterStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); - } else if (isCdc(config)) { - LOGGER.info("CDC state manager selected to manage state object with type {}.", state.getClass().getName()); - // TODO create proper CDC state manager - return null; - } else if (airbyteStateMessage.getStateType() == AirbyteStateType.PER_STREAM) { - LOGGER.info("Per stream state manager selected to manage state object with type {}.", state.getClass().getName()); - return new PerStreamStateManager(airbyteStateMessage, catalog); - } else { - LOGGER.info("Global state manager selected to manage state object with type {}.", state.getClass().getName()); + public static StateManager createStateManager(final List state, + final ConfiguredAirbyteCatalog catalog, + final JsonNode config) { + if (state != null && !state.isEmpty()) { + final AirbyteStateMessage airbyteStateMessage = state.get(0); + if (isCdc(config) || airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { + LOGGER.info("Global state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); return new GlobalStateManager(airbyteStateMessage, catalog); + } else if (airbyteStateMessage.getData() != null) { + LOGGER.info("Legacy adapter state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + return new LegacyStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); + } else { + LOGGER.info("Per stream state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + return new StreamStateManager(state, catalog); } - } else if (state instanceof DbState dbState) { - LOGGER.info("Legacy state manager selected to manage state object with type {}.", state.getClass().getName()); - return new LegacyStateManager(dbState, catalog); } else { - throw new IllegalArgumentException( - "Failed to create state manager due to detection of unsupported state object type: " + state.getClass().getName()); + throw new IllegalArgumentException("Failed to create state manager due to empty state list."); } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java similarity index 50% rename from airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java rename to airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java index 9e8f31555243..92921ea15271 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java @@ -12,12 +12,17 @@ import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.source.relationaldb.CdcStateManager; import io.airbyte.integrations.source.relationaldb.CursorInfo; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Per-stream implementation of the {@link StateManager} interface. @@ -25,19 +30,22 @@ * This implementation generates a state object for each stream detected in catalog/map of known * streams to cursor information stored in this manager. */ -public class PerStreamStateManager extends AbstractStateManager { +public class StreamStateManager extends AbstractStateManager { + + private static final Logger LOGGER = LoggerFactory.getLogger(StreamStateManager.class); /** - * Constructs a new {@link PerStreamStateManager} that is seeded with the provided + * Constructs a new {@link StreamStateManager} that is seeded with the provided * {@link AirbyteStateMessage}. * - * @param airbyteStateMessage The initial state represented as an {@link AirbyteStateMessage}. + * @param airbyteStateMessages The initial state represented as a list of + * {@link AirbyteStateMessage}s. * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector associated with this state * manager. */ - public PerStreamStateManager(final AirbyteStateMessage airbyteStateMessage, final ConfiguredAirbyteCatalog catalog) { + public StreamStateManager(final List airbyteStateMessages, final ConfiguredAirbyteCatalog catalog) { super(catalog, - () -> airbyteStateMessage.getStreams(), + () -> airbyteStateMessages.stream().map(a -> a.getStream()).collect(Collectors.toList()), CURSOR_FUNCTION, CURSOR_FIELD_FUNCTION, NAME_NAMESPACE_PAIR_FUNCTION); @@ -45,18 +53,24 @@ public PerStreamStateManager(final AirbyteStateMessage airbyteStateMessage, fina @Override public CdcStateManager getCdcStateManager() { - return new CdcStateManager(null); + return new CdcStateManager(new CdcState()); } @Override - public AirbyteStateMessage toState() { + public AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair) { final Map pairToCursorInfoMap = getPairToCursorInfoMap(); - final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage(); - final List airbyteStreamStates = StateGeneratorUtils.generatePerStreamState(pairToCursorInfoMap); - return airbyteStateMessage - .withStateType(AirbyteStateType.PER_STREAM) - // Temporarily include legacy state for backwards compatibility with the platform - .withData(Jsons.jsonNode(StateGeneratorUtils.generateDbState(pairToCursorInfoMap))) - .withStreams(airbyteStreamStates); + final Optional cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair)); + + if (cursorInfo.isPresent()) { + return new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + // Temporarily include legacy state for backwards compatibility with the platform + .withData(Jsons.jsonNode(StateGeneratorUtils.generateDbState(pairToCursorInfoMap))) + .withStream(StateGeneratorUtils.generateStreamState(pair, cursorInfo.get())); + } else { + LOGGER.warn("Cursor information could not be located in state for stream {}.", pair); + return new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); + } } -} \ No newline at end of file + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java index ac23123d2972..67b7fddc23f5 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/CursorManagerTest.java @@ -7,24 +7,17 @@ import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getCatalog; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getState; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.getStream; import static org.junit.jupiter.api.Assertions.assertEquals; -import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.source.relationaldb.CursorInfo; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.AirbyteStreamState; import java.util.Collections; -import java.util.List; import java.util.Optional; import org.junit.jupiter.api.Test; @@ -120,27 +113,6 @@ void testCreateCursorInfoStateAndCatalogButNoCursorField() { @Test void testGetters() { - final AirbyteStateMessage state = new AirbyteStateMessage() - .withStreams(List.of( - new AirbyteStreamState() - .withName(STREAM_NAME1) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor(CURSOR))), - new AirbyteStreamState() - .withName(STREAM_NAME2) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE))))); - final CursorManager cursorManager = createCursorManager(CURSOR_FIELD1, CURSOR, NAME_NAMESPACE_PAIR1); final CursorInfo actualCursorInfo = new CursorInfo(CURSOR_FIELD1, CURSOR, null, null); diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManagerTest.java new file mode 100644 index 000000000000..2655c2e9037b --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManagerTest.java @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mock; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteGlobalState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link GlobalStateManager} class. + */ +public class GlobalStateManagerTest { + + @Test + void testCdcStateManager() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteGlobalState globalState = new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState())) + .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace("namespace").withName("name")) + .withStreamState(Jsons.jsonNode(new DbStreamState())))); + final StateManager stateManager = + new GlobalStateManager(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState), catalog); + assertNotNull(stateManager.getCdcStateManager()); + } + + @Test + void testToStateFromLegacyState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final CdcState cdcState = new CdcState().withState(Jsons.jsonNode(Map.of("foo", "bar", "baz", 5))); + final DbState dbState = new DbState() + .withCdc(true) + .withCdcState(cdcState) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final StateManager stateManager = new GlobalStateManager(new AirbyteStateMessage().withData(Jsons.jsonNode(dbState)), catalog); + + final DbState expectedDbState = new DbState() + .withCdc(true) + .withCdcState(cdcState) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + + final AirbyteGlobalState expectedGlobalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(cdcState)) + .withStreamStates(List.of( + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"))), + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)))), + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME3).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)))) + .stream().sorted(Comparator.comparing(o -> o.getStreamDescriptor().getName())).collect(Collectors.toList())); + final AirbyteStateMessage expected = new AirbyteStateMessage() + .withData(Jsons.jsonNode(expectedDbState)) + .withGlobal(expectedGlobalState) + .withStateType(AirbyteStateType.GLOBAL); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expected, actualFirstEmission); + } + + @Test + void testToState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final CdcState cdcState = new CdcState().withState(Jsons.jsonNode(Map.of("foo", "bar", "baz", 5))); + final AirbyteGlobalState globalState = new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState())).withStreamStates( + List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor()).withStreamState(Jsons.jsonNode(new DbStreamState())))); + final StateManager stateManager = + new GlobalStateManager(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState), catalog); + stateManager.getCdcStateManager().setCdcState(cdcState); + + final DbState expectedDbState = new DbState() + .withCdc(true) + .withCdcState(cdcState) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + + final AirbyteGlobalState expectedGlobalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(cdcState)) + .withStreamStates(List.of( + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"))), + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)))), + new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME3).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode(new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)))) + .stream().sorted(Comparator.comparing(o -> o.getStreamDescriptor().getName())).collect(Collectors.toList())); + final AirbyteStateMessage expected = new AirbyteStateMessage() + .withData(Jsons.jsonNode(expectedDbState)) + .withGlobal(expectedGlobalState) + .withStateType(AirbyteStateType.GLOBAL); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expected, actualFirstEmission); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java deleted file mode 100644 index 3ecd4157acf8..000000000000 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyAdapterStateManagerTest.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.relationaldb.state; - -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.source.relationaldb.models.DbState; -import io.airbyte.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.AirbyteStream; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import java.util.Comparator; -import java.util.List; -import java.util.stream.Collectors; -import org.junit.jupiter.api.Test; - -/** - * Test suite for the {@link LegacyAdapterStateManagerTest} class. - */ -public class LegacyAdapterStateManagerTest { - - @Test - void testToState() { - // TODO update to include state type once available - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(List.of( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(List.of(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) - .withCursorField(List.of(CURSOR_FIELD2)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); - - final StateManager stateManager = new LegacyAdapterStateManager(new DbState(), catalog); - - final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState().withStreams(List.of( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)), - new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - .withCdc(false))); - final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); - assertEquals(expectedFirstEmission, actualFirstEmission); - final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new DbState().withStreams(List.of( - new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD2)) - .withCursor("b"), - new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) - .withCdc(false))); - final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); - assertEquals(expectedSecondEmission, actualSecondEmission); - } - -} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java index 0eda420c0770..c3b796f3d270 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java @@ -19,6 +19,7 @@ import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; @@ -77,6 +78,7 @@ void testToState() { final StateManager stateManager = new LegacyStateManager(new DbState(), catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) .withData(Jsons.jsonNode(new DbState().withStreams(List.of( new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) .withCursor("a"), @@ -87,6 +89,7 @@ void testToState() { final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); assertEquals(expectedFirstEmission, actualFirstEmission); final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) .withData(Jsons.jsonNode(new DbState().withStreams(List.of( new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) .withCursor("a"), @@ -111,6 +114,7 @@ void testToStateNullCursorField() { final StateManager stateManager = new LegacyStateManager(new DbState(), catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) .withData(Jsons.jsonNode(new DbState().withStreams(List.of( new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) .withCursor("a"), @@ -137,6 +141,7 @@ void testCursorNotUpdatedForCdc() { final StateManager stateManager = new LegacyStateManager(state, catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) .withData(Jsons.jsonNode(new DbState().withStreams(List.of( new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) .withCursor(null), @@ -146,6 +151,7 @@ void testCursorNotUpdatedForCdc() { final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); assertEquals(expectedFirstEmission, actualFirstEmission); final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() + .withStateType(AirbyteStateType.LEGACY) .withData(Jsons.jsonNode(new DbState().withStreams(List.of( new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(List.of(CURSOR_FIELD1)) .withCursor(null), diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java deleted file mode 100644 index f411f2c8bdbc..000000000000 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/PerStreamStateManagerTest.java +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.relationaldb.state; - -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; -import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.when; - -import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.relationaldb.CursorInfo; -import io.airbyte.integrations.source.relationaldb.models.DbState; -import io.airbyte.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; -import io.airbyte.protocol.models.AirbyteStream; -import io.airbyte.protocol.models.AirbyteStreamState; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Collectors; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -/** - * Test suite for the {@link PerStreamStateManager} class. - */ -public class PerStreamStateManagerTest { - - @Test - void testCreationFromInvalidState() { - final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() - .withStreams(List.of( - new AirbyteStreamState() - .withName(STREAM_NAME1) - .withNamespace(NAMESPACE) - .withState(Jsons.jsonNode("Not a state object")))); - final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - - Assertions.assertDoesNotThrow(() -> { - final StateManager stateManager = new PerStreamStateManager(airbyteStateMessage, catalog); - assertNotNull(stateManager); - }); - } - - @Test - void testGetters() { - final AirbyteStateMessage state = new AirbyteStateMessage() - .withStreams(List.of( - new AirbyteStreamState() - .withName(STREAM_NAME1) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor(CURSOR))), - new AirbyteStreamState() - .withName(STREAM_NAME2) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE))))); - - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(List.of( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(List.of(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - - final StateManager stateManager = new PerStreamStateManager(state, catalog); - - assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getCursorField(NAME_NAMESPACE_PAIR1)); - assertEquals(Optional.of(CURSOR), stateManager.getCursor(NAME_NAMESPACE_PAIR1)); - - assertEquals(Optional.empty(), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getCursorField(NAME_NAMESPACE_PAIR2)); - assertEquals(Optional.empty(), stateManager.getCursor(NAME_NAMESPACE_PAIR2)); - } - - @Test - void testToState() { - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(List.of( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(List.of(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) - .withCursorField(List.of(CURSOR_FIELD2)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); - - final StateManager stateManager = new PerStreamStateManager(new AirbyteStateMessage(), catalog); - - final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withStateType(AirbyteStateType.PER_STREAM) - .withData(Jsons.jsonNode(new DbState().withStreams(List.of( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD2)), - new DbStreamState() - .withStreamName(STREAM_NAME3) - .withStreamNamespace(NAMESPACE) - ).stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())))) - .withStreams(List.of( - new AirbyteStreamState() - .withName(STREAM_NAME1) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"))), - new AirbyteStreamState() - .withName(STREAM_NAME2) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD2)))), - new AirbyteStreamState() - .withName(STREAM_NAME3) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME3) - .withStreamNamespace(NAMESPACE)))) - .stream().sorted(Comparator.comparing(AirbyteStreamState::getName)).collect(Collectors.toList())); - final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); - assertEquals(expectedFirstEmission, actualFirstEmission); - final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() - .withStateType(AirbyteStateType.PER_STREAM) - .withData(Jsons.jsonNode(new DbState().withStreams(List.of( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD2)) - .withCursor("b"), - new DbStreamState() - .withStreamName(STREAM_NAME3) - .withStreamNamespace(NAMESPACE) - ).stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())))) - .withStreams(List.of( - new AirbyteStreamState() - .withName(STREAM_NAME1) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"))), - new AirbyteStreamState() - .withName(STREAM_NAME2) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD2)) - .withCursor("b"))), - new AirbyteStreamState() - .withName(STREAM_NAME3) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME3) - .withStreamNamespace(NAMESPACE)))) - .stream().sorted(Comparator.comparing(AirbyteStreamState::getName)).collect(Collectors.toList())); - - final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); - assertEquals(expectedSecondEmission, actualSecondEmission); - } - - @Test - void testToStateNullCursorField() { - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() - .withStreams(List.of( - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) - .withCursorField(List.of(CURSOR_FIELD1)), - new ConfiguredAirbyteStream() - .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - final StateManager stateManager = new PerStreamStateManager(new AirbyteStateMessage(), catalog); - - final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withStateType(AirbyteStateType.PER_STREAM) - .withData(Jsons.jsonNode(new DbState().withStreams(List.of( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE) - ).stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())))) - .withStreams( - List.of( - new AirbyteStreamState() - .withName(STREAM_NAME1) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"))), - new AirbyteStreamState() - .withName(STREAM_NAME2) - .withNamespace(NAMESPACE) - .withState( - Jsons.jsonNode( - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE)))) - .stream().sorted(Comparator.comparing(AirbyteStreamState::getName)).collect(Collectors.toList())); - - final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); - assertEquals(expectedFirstEmission, actualFirstEmission); - } - - @Test - void testCdcStateManager() { - final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - final StateManager stateManager = new PerStreamStateManager(new AirbyteStateMessage(), catalog); - assertNotNull(stateManager.getCdcStateManager()); - } - - @Test - void testNullNameNamespacePairFiltered() { - final Map pairToCursorInfoMap = new HashMap<>(); - pairToCursorInfoMap.put(new AirbyteStreamNameNamespacePair(null, null), mock(CursorInfo.class)); - final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() - .withStreams(List.of()); - final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - final StateManager stateManager = spy(new PerStreamStateManager(airbyteStateMessage, catalog)); - when(stateManager.getPairToCursorInfoMap()).thenReturn(pairToCursorInfoMap); - - final AirbyteStateMessage result = stateManager.toState(); - assertNotNull(result); - assertEquals(0, result.getStreams().size()); - - pairToCursorInfoMap.clear(); - pairToCursorInfoMap.put(new AirbyteStreamNameNamespacePair("test", null), mock(CursorInfo.class)); - - final AirbyteStateMessage result2 = stateManager.toState(); - assertNotNull(result2); - assertEquals(0, result2.getStreams().size()); - - pairToCursorInfoMap.clear(); - pairToCursorInfoMap.put(new AirbyteStreamNameNamespacePair(null, "test"), mock(CursorInfo.class)); - - final AirbyteStateMessage result3 = stateManager.toState(); - assertNotNull(result3); - assertEquals(0, result3.getStreams().size()); - } - -} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java new file mode 100644 index 000000000000..7ef520137ffe --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.airbyte.protocol.models.StreamDescriptor; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link StateGeneratorUtils} class. + */ +public class StateGeneratorUtilsTest { + + @Test + void testValidStreamDescriptor() { + final StreamDescriptor streamDescriptor1 = null; + final StreamDescriptor streamDescriptor2 = new StreamDescriptor(); + final StreamDescriptor streamDescriptor3 = new StreamDescriptor().withName("name"); + final StreamDescriptor streamDescriptor4 = new StreamDescriptor().withNamespace("namespace"); + final StreamDescriptor streamDescriptor5 = new StreamDescriptor().withName("name").withNamespace("namespace"); + + assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor1)); + assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor2)); + assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor3)); + assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor4)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor5)); + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index 4df2964deb1e..d0d8f70bfd14 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -11,10 +11,16 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteGlobalState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.List; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -23,16 +29,24 @@ */ public class StateManagerFactoryTest { + private static final String NAMESPACE = "namespace"; + private static final String NAME = "name"; + private static final String REPLICATION_SLOT = "replication_slot"; + private static final String PUBLICATION = "publication"; + private static final String REPLICATION_METHOD = "replication_method"; + @Test - void testLegacyStateManagerCreationFromDbState() { + void testNullOrEmptyState() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - final DbState state = mock(DbState.class); final JsonNode config = mock(JsonNode.class); - final StateManager stateManager = StateManagerFactory.createStateManager(state, catalog, config); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(null, catalog, config); + }); - Assertions.assertNotNull(stateManager); - Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(List.of(), catalog, config); + }); } @Test @@ -42,39 +56,30 @@ void testLegacyAdapterStateManagerCreationFromAirbyteStateMessage() { final JsonNode config = mock(JsonNode.class); when(airbyteStateMessage.getData()).thenReturn(Jsons.jsonNode(new DbState())); - final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); Assertions.assertNotNull(stateManager); - Assertions.assertEquals(LegacyAdapterStateManager.class, stateManager.getClass()); + Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); } @Test - void testCdcStateManagerCreation() { + void testGlobalStateManagerCreation() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); + final AirbyteGlobalState globalState = + new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState().withCdcState(new CdcState().withState(Jsons.jsonNode(new DbState()))))) + .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(NAMESPACE).withName(NAME)) + .withStreamState(Jsons.jsonNode(new DbStreamState())))); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); final JsonNode config = mock(JsonNode.class); final JsonNode replicationConfig = mock(JsonNode.class); - when(replicationConfig.hasNonNull("replication_slot")).thenReturn(true); - when(replicationConfig.hasNonNull("publication")).thenReturn(true); + when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); + when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(true); - when(config.hasNonNull("replication_method")).thenReturn(true); - when(config.get("replication_method")).thenReturn(replicationConfig); - - final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); - - // TODO replace with non-null assertion and type assertion once the CDC state manager exists - Assertions.assertNull(stateManager); - } - - @Test - void testGlobalStateManagerCreation() { - final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); - final JsonNode config = mock(JsonNode.class); - when(airbyteStateMessage.getStateType()).thenReturn(AirbyteStateType.GLOBAL); + when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(true); + when(config.get(REPLICATION_METHOD)).thenReturn(replicationConfig); - final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); Assertions.assertNotNull(stateManager); Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); @@ -83,23 +88,15 @@ void testGlobalStateManagerCreation() { @Test void testPerStreamStateManagerCreation() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( + NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))); final JsonNode config = mock(JsonNode.class); - when(airbyteStateMessage.getData()).thenReturn(null); - when(airbyteStateMessage.getStateType()).thenReturn(AirbyteStateType.PER_STREAM); - final StateManager stateManager = StateManagerFactory.createStateManager(airbyteStateMessage, catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); Assertions.assertNotNull(stateManager); - Assertions.assertEquals(PerStreamStateManager.class, stateManager.getClass()); - } - - @Test - void testStateManagerCreationForUnknownStateObject() { - final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - final JsonNode config = mock(JsonNode.class); - - Assertions.assertThrows(IllegalArgumentException.class, () -> StateManagerFactory.createStateManager("Not Valid", catalog, config)); + Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); } @Test @@ -107,27 +104,27 @@ void testCdcDetectionLogic() { final JsonNode config = mock(JsonNode.class); final JsonNode replicationConfig = mock(JsonNode.class); - when(replicationConfig.hasNonNull("replication_slot")).thenReturn(true); - when(replicationConfig.hasNonNull("publication")).thenReturn(true); - when(config.hasNonNull("replication_method")).thenReturn(true); - when(config.get("replication_method")).thenReturn(replicationConfig); + when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); + when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(true); + when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(true); + when(config.get(REPLICATION_METHOD)).thenReturn(replicationConfig); assertTrue(StateManagerFactory.isCdc(config)); - when(replicationConfig.hasNonNull("replication_slot")).thenReturn(false); + when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(false); assertFalse(StateManagerFactory.isCdc(config)); - when(replicationConfig.hasNonNull("replication_slot")).thenReturn(true); - when(replicationConfig.hasNonNull("publication")).thenReturn(false); + when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); + when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(false); assertFalse(StateManagerFactory.isCdc(config)); - when(replicationConfig.hasNonNull("replication_slot")).thenReturn(true); - when(replicationConfig.hasNonNull("publication")).thenReturn(true); - when(config.hasNonNull("replication_method")).thenReturn(false); + when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); + when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(true); + when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(false); assertFalse(StateManagerFactory.isCdc(config)); - when(replicationConfig.hasNonNull("replication_slot")).thenReturn(false); - when(replicationConfig.hasNonNull("publication")).thenReturn(false); - when(config.hasNonNull("replication_method")).thenReturn(false); + when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(false); + when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(false); + when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(false); assertFalse(StateManagerFactory.isCdc(config)); } diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java new file mode 100644 index 000000000000..68d88cdd0405 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.relationaldb.state; + +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.CURSOR_FIELD2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAMESPACE; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.NAME_NAMESPACE_PAIR2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME1; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; +import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mock; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.StreamDescriptor; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Test suite for the {@link StreamStateManager} class. + */ +public class StreamStateManagerTest { + + @Test + void testCreationFromInvalidState() { + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withStreamState(Jsons.jsonNode("Not a state object"))); + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + + Assertions.assertDoesNotThrow(() -> { + final StateManager stateManager = new StreamStateManager(List.of(airbyteStateMessage), catalog); + assertNotNull(stateManager); + }); + } + + @Test + void testGetters() { + final List state = new ArrayList<>(); + state.add(createStreamState(STREAM_NAME1, NAMESPACE, List.of(CURSOR_FIELD1), CURSOR)); + state.add(createStreamState(STREAM_NAME2, NAMESPACE, List.of(), null)); + + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new StreamStateManager(state, catalog); + + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getCursorField(NAME_NAMESPACE_PAIR1)); + assertEquals(Optional.of(CURSOR), stateManager.getCursor(NAME_NAMESPACE_PAIR1)); + + assertEquals(Optional.empty(), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursorField(NAME_NAMESPACE_PAIR2)); + assertEquals(Optional.empty(), stateManager.getCursor(NAME_NAMESPACE_PAIR2)); + } + + @Test + void testToState() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); + + final DbState expectedFirstDbState = new DbState().withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final AirbyteStateMessage expectedFirstEmission = + createStreamState(STREAM_NAME1, NAMESPACE, List.of(CURSOR_FIELD1), "a").withData(Jsons.jsonNode(expectedFirstDbState)); + + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + + final DbState expectedSecondDbState = new DbState().withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final AirbyteStateMessage expectedSecondEmission = + createStreamState(STREAM_NAME2, NAMESPACE, List.of(CURSOR_FIELD2), "b").withData(Jsons.jsonNode(expectedSecondDbState)); + + final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); + assertEquals(expectedSecondEmission, actualSecondEmission); + } + + @Test + void testToStateWithoutCursorInfo() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair = new AirbyteStreamNameNamespacePair("other", "other"); + + final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); + final AirbyteStateMessage airbyteStateMessage = stateManager.toState(airbyteStreamNameNamespacePair); + assertNotNull(airbyteStateMessage); + assertEquals(AirbyteStateType.STREAM, airbyteStateMessage.getStateType()); + assertNotNull(airbyteStateMessage.getStream()); + } + + @Test + void testToStateNullCursorField() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); + final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); + + final DbState expectedFirstDbState = new DbState().withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + + final AirbyteStateMessage expectedFirstEmission = + createStreamState(STREAM_NAME1, NAMESPACE, List.of(CURSOR_FIELD1), "a").withData(Jsons.jsonNode(expectedFirstDbState)); + final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); + assertEquals(expectedFirstEmission, actualFirstEmission); + } + + @Test + void testCdcStateManager() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final StateManager stateManager = new StreamStateManager( + List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState())), catalog); + assertNotNull(stateManager.getCdcStateManager()); + } + + private List createDefaultState() { + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState())); + } + + private AirbyteStateMessage createStreamState(final String name, + final String namespace, + final List cursorFields, + final String cursorValue) { + final DbStreamState dbStreamState = new DbStreamState() + .withStreamName(name) + .withStreamNamespace(namespace); + + if (cursorFields != null && !cursorFields.isEmpty()) { + dbStreamState.withCursorField(cursorFields); + } + + if (cursorValue != null) { + dbStreamState.withCursor(cursorValue); + } + + return new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(name).withNamespace(namespace)) + .withStreamState(Jsons.jsonNode(dbStreamState))); + } + +} From 9825df784b6177f2bc8041fceea71df427c87fa9 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Mon, 13 Jun 2022 15:52:46 -0400 Subject: [PATCH 10/34] Fix test issues --- .../jdbc/test/JdbcSourceAcceptanceTest.java | 21 ++++++++++--------- .../postgres/PostgresCdcStateHandler.java | 6 +++++- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index 3670d13c224a..b4dd8a59465f 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -45,6 +45,7 @@ import io.airbyte.protocol.models.DestinationSyncMode; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.StreamDescriptor; import io.airbyte.protocol.models.SyncMode; import java.math.BigDecimal; import java.sql.SQLException; @@ -1000,9 +1001,9 @@ protected boolean supportsPerStream() { protected JsonNode createEmptyState(final String streamName, final String streamNamespace) { if (supportsPerStream()) { final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() - .withStateType(AirbyteStateType.PER_STREAM) - .withStreams(List.of(new AirbyteStreamState().withName(streamName).withNamespace(streamNamespace))); - return Jsons.jsonNode(airbyteStateMessage); + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(streamName).withNamespace(streamNamespace))); + return Jsons.jsonNode(List.of(airbyteStateMessage)); } else { final DbState dbState = new DbState() .withStreams(List.of(new DbStreamState().withStreamName(streamName).withStreamNamespace(streamNamespace))); @@ -1018,13 +1019,13 @@ protected JsonNode createEmptyState(final String streamName, final String stream */ protected JsonNode createState(final List streams) { if (supportsPerStream()) { - final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage() - .withStateType(AirbyteStateType.PER_STREAM) - .withStreams(streams.stream() - .map(s -> new AirbyteStreamState().withName(s.getStreamName()).withNamespace(s.getStreamNamespace()).withState(Jsons.jsonNode(s))) - .collect(Collectors.toList())); - - return Jsons.jsonNode(airbyteStateMessage); + final List messages = streams.stream() + .map(s -> new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(s.getStreamName()).withNamespace(s.getStreamNamespace())) + .withStreamState(Jsons.jsonNode(s)))) + .collect(Collectors.toList()); + return Jsons.jsonNode(messages); } else { final DbState dbState = new DbState() .withStreams(streams.stream().collect(Collectors.toList())); diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java index ee5faa04f6d9..f3b72fc8fc8c 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java @@ -31,7 +31,11 @@ public AirbyteMessage saveState(final Map offset, final String d LOGGER.info("debezium state: {}", asJson); final CdcState cdcState = new CdcState().withState(asJson); stateManager.getCdcStateManager().setCdcState(cdcState); - final AirbyteStateMessage stateMessage = stateManager.emit(); + /* + * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. + * Therefore, it doesn't matter what we pass here, as it will be ignored. + */ + final AirbyteStateMessage stateMessage = stateManager.emit(null); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } From 2d1b954728066f7dfdef07a08c8807314f9df42d Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Mon, 13 Jun 2022 16:11:01 -0400 Subject: [PATCH 11/34] Fix issue with updated method signature --- .../integrations/source/mssql/MssqlCdcStateHandler.java | 6 +++++- .../integrations/source/mysql/MySqlCdcStateHandler.java | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java index a054f5226740..21c16080d5d4 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java @@ -41,7 +41,11 @@ public AirbyteMessage saveState(final Map offset, final String d final CdcState cdcState = new CdcState().withState(asJson); stateManager.getCdcStateManager().setCdcState(cdcState); - final AirbyteStateMessage stateMessage = stateManager.emit(); + /* + * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. + * Therefore, it doesn't matter what we pass here, as it will be ignored. + */ + final AirbyteStateMessage stateMessage = stateManager.emit(null); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java index a5f950718c0e..c7f993191c0f 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java @@ -42,7 +42,11 @@ public AirbyteMessage saveState(final Map offset, final String d final CdcState cdcState = new CdcState().withState(asJson); stateManager.getCdcStateManager().setCdcState(cdcState); - final AirbyteStateMessage stateMessage = stateManager.emit(); + /* + * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. + * Therefore, it doesn't matter what we pass here, as it will be ignored. + */ + final AirbyteStateMessage stateMessage = stateManager.emit(null); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } From 15cd5d21b220329967a6082b33360b7c78fbcd46 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Mon, 13 Jun 2022 16:54:10 -0400 Subject: [PATCH 12/34] Handle empty state case in global state manager --- .../relationaldb/state/GlobalStateManager.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java index 1e9f02af7c96..d5c58b6683b2 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java @@ -20,6 +20,7 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.StreamDescriptor; import java.util.Collection; +import java.util.List; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -110,12 +111,19 @@ private static Supplier> getStreamsSupplier(final * we can look for streams in the "global" field of the message. Otherwise, the message is still * storing state in the legacy "data" field. */ - return () -> airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL ? airbyteStateMessage.getGlobal().getStreamStates() - : Jsons.object(airbyteStateMessage.getData(), DbState.class).getStreams().stream() + return () -> { + if (airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { + return airbyteStateMessage.getGlobal().getStreamStates(); + } else if (airbyteStateMessage.getData() != null) { + return Jsons.object(airbyteStateMessage.getData(), DbState.class).getStreams().stream() .map(s -> new AirbyteStreamState().withStreamState(Jsons.jsonNode(s)) .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName()))) .collect( Collectors.toList()); + } else { + return List.of(); + } + }; } } From 80a497a9a9a13a6215ec23acfd9dcedd541d60a2 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Mon, 13 Jun 2022 16:54:17 -0400 Subject: [PATCH 13/34] Adjust to protocol changes --- .../jdbc/test/JdbcSourceAcceptanceTest.java | 67 +++++++++---------- .../PostgresJdbcSourceAcceptanceTest.java | 5 +- 2 files changed, 34 insertions(+), 38 deletions(-) diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index b4dd8a59465f..2e3a6578a9f5 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -354,12 +354,15 @@ void testDiscoverWithMultipleSchemas() throws Exception { final AirbyteCatalog actual = source.discover(config); final AirbyteCatalog expected = getCatalog(getDefaultNamespace()); - expected.getStreams().add(CatalogHelpers + final List catalogStreams = new ArrayList<>(); + catalogStreams.addAll(expected.getStreams()); + catalogStreams.add(CatalogHelpers .createAirbyteStream(TABLE_NAME, SCHEMA_NAME2, Field.of(COL_ID, JsonSchemaType.STRING), Field.of(COL_NAME, JsonSchemaType.STRING)) .withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))); + expected.setStreams(catalogStreams); // sort streams by name so that we are comparing lists with the same order. final Comparator schemaTableCompare = Comparator.comparing(stream -> stream.getNamespace() + "." + stream.getName()); expected.getStreams().sort(schemaTableCompare); @@ -661,9 +664,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String .withStreamNamespace(namespace) .withCursorField(List.of(COL_ID)) .withCursor("5"); - expectedMessages.add(new AirbyteMessage() - .withType(Type.STATE) - .withState(Jsons.object(createState(List.of(state)), AirbyteStateMessage.class))); + expectedMessages.addAll(createExpectedTestMessages(List.of(state))); return expectedMessages; } @@ -734,9 +735,9 @@ void testReadMultipleTablesIncrementally() throws Exception { .withCursor("3")); final List expectedMessagesFirstSync = new ArrayList<>(getTestMessages()); - expectedMessagesFirstSync.add(createExpectedTestMessage(expectedStateStreams1)); + expectedMessagesFirstSync.addAll(createExpectedTestMessages(expectedStateStreams1)); expectedMessagesFirstSync.addAll(secondStreamExpectedMessages); - expectedMessagesFirstSync.add(createExpectedTestMessage(expectedStateStreams2)); + expectedMessagesFirstSync.addAll(createExpectedTestMessages(expectedStateStreams2)); setEmittedAtToNull(actualMessagesFirstSync); @@ -803,7 +804,7 @@ private void incrementalCursorCheck( .withCursor(initialCursorValue); final List actualMessages = MoreIterators - .toList(source.read(config, configuredCatalog, createState(List.of(dbStreamState)))); + .toList(source.read(config, configuredCatalog, Jsons.jsonNode(createState(List.of(dbStreamState))))); setEmittedAtToNull(actualMessages); @@ -814,7 +815,7 @@ private void incrementalCursorCheck( .withCursorField(List.of(cursorField)) .withCursor(endCursorValue)); final List expectedMessages = new ArrayList<>(expectedRecordMessages); - expectedMessages.add(createExpectedTestMessage(expectedStreams)); + expectedMessages.addAll(createExpectedTestMessages(expectedStreams)); assertEquals(actualMessages.size(), expectedMessages.size()); assertEquals(actualMessages, expectedMessages); @@ -883,10 +884,30 @@ protected List getTestMessages() { COL_UPDATED_AT, "2006-10-19T00:00:00Z"))))); } - protected AirbyteMessage createExpectedTestMessage(final List states) { - return new AirbyteMessage() - .withType(Type.STATE) - .withState(Jsons.object(createState(states), AirbyteStateMessage.class)); + protected List createExpectedTestMessages(final List states) { + return supportsPerStream() + ? states.stream() + .map(s -> new AirbyteMessage().withType(Type.STATE) + .withState(new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s))))) + .collect( + Collectors.toList()) + : List.of(new AirbyteMessage().withType(Type.STATE).withState(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withCdc(false).withStreams(states))))); + } + + protected List createState(final List states) { + return supportsPerStream() + ? states.stream() + .map(s -> new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s)))) + .collect( + Collectors.toList()) + : List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(new DbState().withStreams(states)))); } protected ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { @@ -1011,28 +1032,6 @@ protected JsonNode createEmptyState(final String streamName, final String stream } } - /** - * Creates state with the provided stream(s). - * - * @param streams A list of streams. - * @return A {@link JsonNode} representation of the state with the provided stream state. - */ - protected JsonNode createState(final List streams) { - if (supportsPerStream()) { - final List messages = streams.stream() - .map(s -> new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) - .withStream(new AirbyteStreamState() - .withStreamDescriptor(new StreamDescriptor().withName(s.getStreamName()).withNamespace(s.getStreamNamespace())) - .withStreamState(Jsons.jsonNode(s)))) - .collect(Collectors.toList()); - return Jsons.jsonNode(messages); - } else { - final DbState dbState = new DbState() - .withStreams(streams.stream().collect(Collectors.toList())); - return Jsons.jsonNode(dbState); - } - } - /** * Extracts the state component from the provided {@link AirbyteMessage} based on the value returned * by {@link #supportsPerStream()}. diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java index 81f18cca890b..1dba9e85dc3c 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java @@ -26,7 +26,6 @@ import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteRecordMessage; -import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.ConnectorSpecification; @@ -435,9 +434,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("5"); - expectedMessages.add(new AirbyteMessage() - .withType(AirbyteMessage.Type.STATE) - .withState(Jsons.object(createState(List.of(state)), AirbyteStateMessage.class))); + expectedMessages.addAll(createExpectedTestMessages(List.of(state))); return expectedMessages; } From e8b853b984c2817bdec4d094a9dfa79833dfec48 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 12:37:32 -0400 Subject: [PATCH 14/34] Fix failing acceptance tests --- .../jdbc/test/JdbcSourceAcceptanceTest.java | 68 ++++++++++++------- .../source/postgres/PostgresSource.java | 18 +++-- .../PostgresJdbcSourceAcceptanceTest.java | 3 +- .../source/relationaldb/AbstractDbSource.java | 5 +- .../state/StateGeneratorUtils.java | 10 +-- .../relationaldb/state/StateManager.java | 5 ++ .../state/StateManagerFactory.java | 6 +- .../state/StreamStateManager.java | 1 + .../state/StateManagerFactoryTest.java | 43 +++++++++++- 9 files changed, 115 insertions(+), 44 deletions(-) diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index 2e3a6578a9f5..f2cc62046cfc 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -392,8 +392,7 @@ void testReadOneColumn() throws Exception { setEmittedAtToNull(actualMessages); final List expectedMessages = getAirbyteMessagesReadOneColumn(); - assertEquals(actualMessages.size(), expectedMessages.size()); - assertEquals(actualMessages, expectedMessages); + assertEquals(expectedMessages.size(), actualMessages.size()); assertEquals(expectedMessages, actualMessages); } @@ -414,7 +413,7 @@ protected List getAirbyteMessagesReadOneColumn() { void testReadMultipleTables() throws Exception { final ConfiguredAirbyteCatalog catalog = getConfiguredCatalogWithOneStream( getDefaultNamespace()); - final List expectedMessages = new ArrayList<>(getTestMessages()); + final List expectedMessages = getTestMessages(); for (int i = 2; i < 10; i++) { final int iFinal = i; @@ -449,8 +448,7 @@ void testReadMultipleTables() throws Exception { setEmittedAtToNull(actualMessages); - assertEquals(actualMessages.size(), expectedMessages.size()); - assertEquals(actualMessages, expectedMessages); + assertEquals(expectedMessages.size(), actualMessages.size()); assertEquals(expectedMessages, actualMessages); } @@ -483,11 +481,10 @@ void testTablesWithQuoting() throws Exception { setEmittedAtToNull(actualMessages); final List secondStreamExpectedMessages = getAirbyteMessagesForTablesWithQuoting(streamForTableWithSpaces); - final List expectedMessages = new ArrayList<>(getTestMessages()); + final List expectedMessages = getTestMessages(); expectedMessages.addAll(secondStreamExpectedMessages); - assertEquals(actualMessages.size(), expectedMessages.size()); - assertEquals(actualMessages, expectedMessages); + assertEquals(expectedMessages.size(), actualMessages.size()); assertEquals(expectedMessages, actualMessages); } @@ -629,8 +626,7 @@ void testReadOneTableIncrementallyTwice() throws Exception { setEmittedAtToNull(actualMessagesSecondSync); - assertEquals(actualMessagesSecondSync.size(), expectedMessages.size()); - assertEquals(actualMessagesSecondSync, expectedMessages); + assertEquals(expectedMessages.size(), actualMessagesSecondSync.size()); assertEquals(expectedMessages, actualMessagesSecondSync); } @@ -712,6 +708,8 @@ void testReadMultipleTablesIncrementally() throws Exception { // we know the second streams messages are the same as the first minus the updated at column. so we // cheat and generate the expected messages off of the first expected messages. final List secondStreamExpectedMessages = getAirbyteMessagesSecondStreamWithNamespace(streamName2); + + // Represents the state after the first stream has been updated final List expectedStateStreams1 = List.of( new DbStreamState() .withStreamName(streamName) @@ -723,11 +721,13 @@ void testReadMultipleTablesIncrementally() throws Exception { .withStreamNamespace(namespace) .withCursorField(List.of(COL_ID))); - final List expectedStateStreams2 = List.of(new DbStreamState() - .withStreamName(streamName) - .withStreamNamespace(namespace) - .withCursorField(List.of(COL_ID)) - .withCursor("3"), + // Represents the state after both streams have been updated + final List expectedStateStreams2 = List.of( + new DbStreamState() + .withStreamName(streamName) + .withStreamNamespace(namespace) + .withCursorField(List.of(COL_ID)) + .withCursor("3"), new DbStreamState() .withStreamName(streamName2) .withStreamNamespace(namespace) @@ -735,14 +735,13 @@ void testReadMultipleTablesIncrementally() throws Exception { .withCursor("3")); final List expectedMessagesFirstSync = new ArrayList<>(getTestMessages()); - expectedMessagesFirstSync.addAll(createExpectedTestMessages(expectedStateStreams1)); + expectedMessagesFirstSync.add(createStateMessage(expectedStateStreams1.get(0), expectedStateStreams1)); expectedMessagesFirstSync.addAll(secondStreamExpectedMessages); - expectedMessagesFirstSync.addAll(createExpectedTestMessages(expectedStateStreams2)); + expectedMessagesFirstSync.add(createStateMessage(expectedStateStreams2.get(1), expectedStateStreams2)); setEmittedAtToNull(actualMessagesFirstSync); - assertEquals(actualMessagesFirstSync.size(), expectedMessagesFirstSync.size()); - assertEquals(actualMessagesFirstSync, expectedMessagesFirstSync); + assertEquals(expectedMessagesFirstSync.size(), actualMessagesFirstSync.size()); assertEquals(expectedMessagesFirstSync, actualMessagesFirstSync); } @@ -817,8 +816,7 @@ private void incrementalCursorCheck( final List expectedMessages = new ArrayList<>(expectedRecordMessages); expectedMessages.addAll(createExpectedTestMessages(expectedStreams)); - assertEquals(actualMessages.size(), expectedMessages.size()); - assertEquals(actualMessages, expectedMessages); + assertEquals(expectedMessages.size(), actualMessages.size()); assertEquals(expectedMessages, actualMessages); } @@ -888,10 +886,12 @@ protected List createExpectedTestMessages(final List new AirbyteMessage().withType(Type.STATE) - .withState(new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) - .withStream(new AirbyteStreamState() - .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) - .withStreamState(Jsons.jsonNode(s))))) + .withState( + new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s))) + .withData(Jsons.jsonNode(new DbState().withCdc(false).withStreams(states))))) .collect( Collectors.toList()) : List.of(new AirbyteMessage().withType(Type.STATE).withState(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY) @@ -1041,10 +1041,26 @@ protected JsonNode createEmptyState(final String streamName, final String stream */ protected JsonNode extractState(final AirbyteMessage airbyteMessage) { if (supportsPerStream()) { - return Jsons.jsonNode(airbyteMessage.getState()); + return Jsons.jsonNode(List.of(airbyteMessage.getState())); } else { return airbyteMessage.getState().getData(); } } + protected AirbyteMessage createStateMessage(final DbStreamState dbStreamState, final List legacyStates) { + if (supportsPerStream()) { + return new AirbyteMessage().withType(Type.STATE) + .withState( + new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(dbStreamState.getStreamNamespace()) + .withName(dbStreamState.getStreamName())) + .withStreamState(Jsons.jsonNode(dbStreamState))) + .withData(Jsons.jsonNode(new DbState().withCdc(false).withStreams(legacyStates)))); + } else { + return new AirbyteMessage().withType(Type.STATE).withState(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY) + .withData(Jsons.jsonNode(new DbState().withCdc(false).withStreams(legacyStates)))); + } + } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 016e0071c8da..645d1c2a797f 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -27,10 +27,12 @@ import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.jdbc.dto.JdbcPrivilegeDto; import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import io.airbyte.integrations.source.relationaldb.state.AirbyteStateMessageListTypeReference; import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteGlobalState; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; @@ -411,12 +413,18 @@ private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { // TODO This is a temporary override so that the Postgres source can take advantage of per-stream // state. @Override - protected List deserializeState(final JsonNode stateJson) { + protected List deserializeState(final JsonNode stateJson, final JsonNode config) { if (stateJson == null) { - // TODO What should the default/empty state be -- per stream or global? - return List.of(new AirbyteStateMessage() - .withStateType(AirbyteStateType.STREAM) - .withStream(new AirbyteStreamState())); + if (isCdc(config)) { + final AirbyteGlobalState globalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(new CdcState())) + .withStreamStates(List.of()); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState)); + } else { + return List.of(new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState())); + } } else { try { return Jsons.object(stateJson, new AirbyteStateMessageListTypeReference()); diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java index 1dba9e85dc3c..1695d4ed8543 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java @@ -427,8 +427,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String COL_UPDATED_AT, "2006-10-19", COL_WAKEUP_AT, "12:12:12.123456-05:00", COL_LAST_VISITED_AT, "2006-10-19T17:23:54.123456Z", - COL_LAST_COMMENT_AT, "2006-01-01T17:23:54.123456", - COL_UPDATED_AT, "2006-10-19"))))); + COL_LAST_COMMENT_AT, "2006-01-01T17:23:54.123456"))))); final DbStreamState state = new DbStreamState() .withStreamName(streamName) .withStreamNamespace(namespace) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index 4dc28d1bcdd7..7921f1f9874e 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -108,7 +108,7 @@ public AutoCloseableIterator read(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final JsonNode state) throws Exception { - final StateManager stateManager = StateManagerFactory.createStateManager(deserializeState(state), catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(deserializeState(state, config), catalog, config); final Instant emittedAt = Instant.now(); final Database database = createDatabaseInternal(config); @@ -516,9 +516,10 @@ private Database createDatabaseInternal(final JsonNode sourceConfig) throws Exce * Deserializes the state represented as JSON into an object representation. * * @param stateJson The state as JSON. + * @param config The plugin configuration. * @return The deserialized object representation of the state. */ - protected List deserializeState(final JsonNode stateJson) { + protected List deserializeState(final JsonNode stateJson, final JsonNode config) { if (stateJson == null) { // For backwards compatibility with existing connectors return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(new DbState()))); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java index a8ab92e2294e..ecfa8c412732 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java @@ -100,10 +100,12 @@ public static List generateStreamStateList(final Map pairToCursorInfoMap) { - return new DbState().withStreams(pairToCursorInfoMap.entrySet().stream() - .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> generateDbStreamState(e.getKey(), e.getValue())) - .collect(Collectors.toList())); + return new DbState() + .withCdc(false) + .withStreams(pairToCursorInfoMap.entrySet().stream() + .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. + .map(e -> generateDbStreamState(e.getKey(), e.getValue())) + .collect(Collectors.toList())); } /** diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java index 4b25139247c4..fdd0265eb584 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java @@ -11,6 +11,8 @@ import io.airbyte.protocol.models.AirbyteStateMessage; import java.util.Map; import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Defines a manager that manages connector state. Connector state is used to keep track of the data @@ -21,6 +23,8 @@ */ public interface StateManager { + Logger LOGGER = LoggerFactory.getLogger(StateManager.class); + /** * Retrieves the {@link CdcStateManager} associated with the state manager. * @@ -136,6 +140,7 @@ default AirbyteStateMessage emit(final AirbyteStreamNameNamespacePair pair) { default AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pair, final String cursor) { final Optional cursorInfo = getCursorInfo(pair); Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); + LOGGER.debug("Updating cursor value for {} to {}...", pair, cursor); cursorInfo.get().setCursor(cursor); return emit(pair); } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index 84e7dced88c6..8e776ec8b81c 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -44,11 +44,11 @@ public static StateManager createStateManager(final List st if (isCdc(config) || airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { LOGGER.info("Global state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); return new GlobalStateManager(airbyteStateMessage, catalog); - } else if (airbyteStateMessage.getData() != null) { - LOGGER.info("Legacy adapter state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + } else if (airbyteStateMessage.getData() != null && airbyteStateMessage.getStream() == null) { + LOGGER.info("Legacy state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); return new LegacyStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); } else { - LOGGER.info("Per stream state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + LOGGER.info("Stream state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); return new StreamStateManager(state, catalog); } } else { diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java index 92921ea15271..0e365238ac05 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java @@ -62,6 +62,7 @@ public AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair) { final Optional cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair)); if (cursorInfo.isPresent()) { + LOGGER.debug("Generating state message for {}...", pair); return new AirbyteStateMessage() .withStateType(AirbyteStateType.STREAM) // Temporarily include legacy state for backwards compatibility with the platform diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index d0d8f70bfd14..5cbbebe60c7b 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -50,7 +50,7 @@ void testNullOrEmptyState() { } @Test - void testLegacyAdapterStateManagerCreationFromAirbyteStateMessage() { + void testLegacyStateManagerCreationFromAirbyteStateMessage() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); final JsonNode config = mock(JsonNode.class); @@ -86,7 +86,31 @@ void testGlobalStateManagerCreation() { } @Test - void testPerStreamStateManagerCreation() { + void testGlobalStateManagerCreationWithLegacyDataPresent() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteGlobalState globalState = + new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState().withCdcState(new CdcState().withState(Jsons.jsonNode(new DbState()))))) + .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(NAMESPACE).withName(NAME)) + .withStreamState(Jsons.jsonNode(new DbStreamState())))); + final AirbyteStateMessage airbyteStateMessage = + new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState).withData(Jsons.jsonNode(new DbState())); + final JsonNode config = mock(JsonNode.class); + final JsonNode replicationConfig = mock(JsonNode.class); + + when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); + when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(true); + + when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(true); + when(config.get(REPLICATION_METHOD)).thenReturn(replicationConfig); + + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); + } + + @Test + void testStreamStateManagerCreation() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( @@ -99,6 +123,21 @@ void testPerStreamStateManagerCreation() { Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); } + @Test + void testStreamStateManagerCreationWithLegacyDataPresent() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( + NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))) + .withData(Jsons.jsonNode(new DbState())); + final JsonNode config = mock(JsonNode.class); + + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); + } + @Test void testCdcDetectionLogic() { final JsonNode config = mock(JsonNode.class); From 9a940e4f11c0f3e01b64eae24be8d946f606f984 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 12:53:19 -0400 Subject: [PATCH 15/34] Fix failing test --- .../state/StreamStateManagerTest.java | 84 ++++++++++--------- 1 file changed, 45 insertions(+), 39 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java index 68d88cdd0405..8ca851a95488 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java @@ -98,41 +98,45 @@ void testToState() { final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); - final DbState expectedFirstDbState = new DbState().withStreams(List.of( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD2)), - new DbStreamState() - .withStreamName(STREAM_NAME3) - .withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final DbState expectedFirstDbState = new DbState() + .withCdc(false) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); final AirbyteStateMessage expectedFirstEmission = createStreamState(STREAM_NAME1, NAMESPACE, List.of(CURSOR_FIELD1), "a").withData(Jsons.jsonNode(expectedFirstDbState)); final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); assertEquals(expectedFirstEmission, actualFirstEmission); - final DbState expectedSecondDbState = new DbState().withStreams(List.of( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD2)) - .withCursor("b"), - new DbStreamState() - .withStreamName(STREAM_NAME3) - .withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final DbState expectedSecondDbState = new DbState() + .withCdc(false) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD2)) + .withCursor("b"), + new DbStreamState() + .withStreamName(STREAM_NAME3) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); final AirbyteStateMessage expectedSecondEmission = createStreamState(STREAM_NAME2, NAMESPACE, List.of(CURSOR_FIELD2), "b").withData(Jsons.jsonNode(expectedSecondDbState)); @@ -172,16 +176,18 @@ void testToStateNullCursorField() { .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); - final DbState expectedFirstDbState = new DbState().withStreams(List.of( - new DbStreamState() - .withStreamName(STREAM_NAME1) - .withStreamNamespace(NAMESPACE) - .withCursorField(List.of(CURSOR_FIELD1)) - .withCursor("a"), - new DbStreamState() - .withStreamName(STREAM_NAME2) - .withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); + final DbState expectedFirstDbState = new DbState() + .withCdc(false) + .withStreams(List.of( + new DbStreamState() + .withStreamName(STREAM_NAME1) + .withStreamNamespace(NAMESPACE) + .withCursorField(List.of(CURSOR_FIELD1)) + .withCursor("a"), + new DbStreamState() + .withStreamName(STREAM_NAME2) + .withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())); final AirbyteStateMessage expectedFirstEmission = createStreamState(STREAM_NAME1, NAMESPACE, List.of(CURSOR_FIELD1), "a").withData(Jsons.jsonNode(expectedFirstDbState)); From be766c7b60338fdba85cc6ca1e7773eb74275a11 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 13:13:23 -0400 Subject: [PATCH 16/34] Fix unmodifiable list issue --- .../source/jdbc/test/JdbcSourceAcceptanceTest.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index f2cc62046cfc..c38779b4b103 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -439,8 +439,7 @@ void testReadMultipleTables() throws Exception { Field.of(COL_ID, JsonSchemaType.NUMBER), Field.of(COL_NAME, JsonSchemaType.STRING))); - final List secondStreamExpectedMessages = getAirbyteMessagesSecondSync(streamName2); - expectedMessages.addAll(secondStreamExpectedMessages); + expectedMessages.addAll(getAirbyteMessagesSecondSync(streamName2)); } final List actualMessages = MoreIterators @@ -480,9 +479,8 @@ void testTablesWithQuoting() throws Exception { setEmittedAtToNull(actualMessages); - final List secondStreamExpectedMessages = getAirbyteMessagesForTablesWithQuoting(streamForTableWithSpaces); - final List expectedMessages = getTestMessages(); - expectedMessages.addAll(secondStreamExpectedMessages); + final List expectedMessages = new ArrayList<>(getTestMessages()); + expectedMessages.addAll(getAirbyteMessagesForTablesWithQuoting(streamForTableWithSpaces)); assertEquals(expectedMessages.size(), actualMessages.size()); assertEquals(expectedMessages, actualMessages); From e34efc11f6b6bb1280089843a4218cf86466505b Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 13:31:25 -0400 Subject: [PATCH 17/34] Fix unmodifiable exception --- .../integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index c38779b4b103..74d8d7add0af 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -413,7 +413,7 @@ protected List getAirbyteMessagesReadOneColumn() { void testReadMultipleTables() throws Exception { final ConfiguredAirbyteCatalog catalog = getConfiguredCatalogWithOneStream( getDefaultNamespace()); - final List expectedMessages = getTestMessages(); + final List expectedMessages = new ArrayList<>(getTestMessages()); for (int i = 2; i < 10; i++) { final int iFinal = i; From 9c9aefb774a2b1aa45e4cac3e529935b829b75a5 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 13:52:26 -0400 Subject: [PATCH 18/34] PR feedback --- .../source/mssql/MssqlCdcStateHandler.java | 5 ++-- .../source/mysql/MySqlCdcStateHandler.java | 5 ++-- .../postgres/PostgresCdcStateHandler.java | 5 ++-- .../state/AbstractStateManager.java | 3 +- .../state/GlobalStateManager.java | 3 +- .../state/LegacyStateManager.java | 4 +-- .../relationaldb/state/StateManager.java | 7 +++-- .../state/StreamStateManager.java | 30 +++++++++++-------- .../state/StreamStateManagerTest.java | 26 ++++++++++++++-- 9 files changed, 60 insertions(+), 28 deletions(-) diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java index 21c16080d5d4..ad275bda45c2 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcStateHandler.java @@ -17,6 +17,7 @@ import io.airbyte.protocol.models.AirbyteStateMessage; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,9 +44,9 @@ public AirbyteMessage saveState(final Map offset, final String d stateManager.getCdcStateManager().setCdcState(cdcState); /* * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. - * Therefore, it doesn't matter what we pass here, as it will be ignored. + * Therefore, provide an empty optional. */ - final AirbyteStateMessage stateMessage = stateManager.emit(null); + final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty()); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java index c7f993191c0f..e896f3082ce7 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcStateHandler.java @@ -17,6 +17,7 @@ import io.airbyte.protocol.models.AirbyteStateMessage; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,9 +45,9 @@ public AirbyteMessage saveState(final Map offset, final String d stateManager.getCdcStateManager().setCdcState(cdcState); /* * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. - * Therefore, it doesn't matter what we pass here, as it will be ignored. + * Therefore, provide an empty optional. */ - final AirbyteStateMessage stateMessage = stateManager.emit(null); + final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty()); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java index f3b72fc8fc8c..6175f81c904f 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcStateHandler.java @@ -13,6 +13,7 @@ import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; import java.util.Map; +import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,9 +34,9 @@ public AirbyteMessage saveState(final Map offset, final String d stateManager.getCdcStateManager().setCdcState(cdcState); /* * Namespace pair is ignored by global state manager, but is needed for satisfy the API contract. - * Therefore, it doesn't matter what we pass here, as it will be ignored. + * Therefore, provide an empty optional. */ - final AirbyteStateMessage stateMessage = stateManager.emit(null); + final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty()); return new AirbyteMessage().withType(Type.STATE).withState(stateMessage); } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java index 6c56a8a2c79a..dec78ec39fac 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/AbstractStateManager.java @@ -11,6 +11,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Function; import java.util.function.Supplier; @@ -57,6 +58,6 @@ public Map getPairToCursorInfoMap() } @Override - public abstract AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair); + public abstract AirbyteStateMessage toState(final Optional pair); } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java index d5c58b6683b2..93d159641641 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java @@ -21,6 +21,7 @@ import io.airbyte.protocol.models.StreamDescriptor; import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -62,7 +63,7 @@ public CdcStateManager getCdcStateManager() { } @Override - public AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair) { + public AirbyteStateMessage toState(final Optional pair) { // Populate global state final AirbyteGlobalState globalState = new AirbyteGlobalState(); globalState.setSharedState(Jsons.jsonNode(getCdcStateManager().getCdcState())); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java index ec3e666b5f0e..64dabe9e07e2 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManager.java @@ -88,7 +88,7 @@ public CdcStateManager getCdcStateManager() { } @Override - public AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair) { + public AirbyteStateMessage toState(final Optional pair) { final DbState dbState = StateGeneratorUtils.generateDbState(getPairToCursorInfoMap()) .withCdc(isCdc) .withCdcState(getCdcStateManager().getCdcState()); @@ -106,7 +106,7 @@ public AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair pa cursorInfo.get().setCursor(cursor); } - return toState(pair); + return toState(Optional.ofNullable(pair)); } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java index fdd0265eb584..3493418e0689 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java @@ -29,6 +29,7 @@ public interface StateManager { * Retrieves the {@link CdcStateManager} associated with the state manager. * * @return The {@link CdcStateManager} + * @throws UnsupportedOperationException if the state manager does not support tracking change data capture (CDC) state. */ CdcStateManager getCdcStateManager(); @@ -50,7 +51,7 @@ public interface StateManager { * @return The {@link AirbyteStateMessage} that represents the current state contained in the state * manager. */ - AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair); + AirbyteStateMessage toState(final Optional pair); /** * Retrieves an {@link Optional} possibly containing the cursor value tracked in the state @@ -121,7 +122,7 @@ default Optional getCursorInfo(final AirbyteStreamNameNamespacePair * @return An {@link AirbyteStateMessage} that represents the current state maintained by the state * manager. */ - default AirbyteStateMessage emit(final AirbyteStreamNameNamespacePair pair) { + default AirbyteStateMessage emit(final Optional pair) { return toState(pair); } @@ -142,7 +143,7 @@ default AirbyteStateMessage updateAndEmit(final AirbyteStreamNameNamespacePair p Preconditions.checkState(cursorInfo.isPresent(), "Could not find cursor information for stream: " + pair); LOGGER.debug("Updating cursor value for {} to {}...", pair, cursor); cursorInfo.get().setCursor(cursor); - return emit(pair); + return emit(Optional.ofNullable(pair)); } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java index 0e365238ac05..009062827baf 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java @@ -53,25 +53,29 @@ public StreamStateManager(final List airbyteStateMessages, @Override public CdcStateManager getCdcStateManager() { - return new CdcStateManager(new CdcState()); + throw new UnsupportedOperationException("CDC state management not supported by stream state manager."); } @Override - public AirbyteStateMessage toState(final AirbyteStreamNameNamespacePair pair) { - final Map pairToCursorInfoMap = getPairToCursorInfoMap(); - final Optional cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair)); + public AirbyteStateMessage toState(final Optional pair) { + if (pair.isPresent()) { + final Map pairToCursorInfoMap = getPairToCursorInfoMap(); + final Optional cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair.get())); - if (cursorInfo.isPresent()) { - LOGGER.debug("Generating state message for {}...", pair); - return new AirbyteStateMessage() - .withStateType(AirbyteStateType.STREAM) - // Temporarily include legacy state for backwards compatibility with the platform - .withData(Jsons.jsonNode(StateGeneratorUtils.generateDbState(pairToCursorInfoMap))) - .withStream(StateGeneratorUtils.generateStreamState(pair, cursorInfo.get())); + if (cursorInfo.isPresent()) { + LOGGER.debug("Generating state message for {}...", pair); + return new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + // Temporarily include legacy state for backwards compatibility with the platform + .withData(Jsons.jsonNode(StateGeneratorUtils.generateDbState(pairToCursorInfoMap))) + .withStream(StateGeneratorUtils.generateStreamState(pair.get(), cursorInfo.get())); + } else { + LOGGER.warn("Cursor information could not be located in state for stream {}. Returning a new, empty state message...", pair); + return new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); + } } else { - LOGGER.warn("Cursor information could not be located in state for stream {}.", pair); + LOGGER.warn("Stream not provided. Returning a new, empty state message..."); return new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState()); } } - } diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java index 8ca851a95488..704dc665cf0d 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManagerTest.java @@ -15,6 +15,7 @@ import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.Mockito.mock; import io.airbyte.commons.json.Jsons; @@ -159,12 +160,33 @@ void testToStateWithoutCursorInfo() { final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair = new AirbyteStreamNameNamespacePair("other", "other"); final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); - final AirbyteStateMessage airbyteStateMessage = stateManager.toState(airbyteStreamNameNamespacePair); + final AirbyteStateMessage airbyteStateMessage = stateManager.toState(Optional.of(airbyteStreamNameNamespacePair)); assertNotNull(airbyteStateMessage); assertEquals(AirbyteStateType.STREAM, airbyteStateMessage.getStateType()); assertNotNull(airbyteStateMessage.getStream()); } + @Test + void testToStateWithoutStreamPair() { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME1).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD1)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)) + .withCursorField(List.of(CURSOR_FIELD2)), + new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); + + final StateManager stateManager = new StreamStateManager(createDefaultState(), catalog); + final AirbyteStateMessage airbyteStateMessage = stateManager.toState(Optional.empty()); + assertNotNull(airbyteStateMessage); + assertEquals(AirbyteStateType.STREAM, airbyteStateMessage.getStateType()); + assertNotNull(airbyteStateMessage.getStream()); + assertNull(airbyteStateMessage.getStream().getStreamState()); + } + @Test void testToStateNullCursorField() { final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() @@ -200,7 +222,7 @@ void testCdcStateManager() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); final StateManager stateManager = new StreamStateManager( List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState())), catalog); - assertNotNull(stateManager.getCdcStateManager()); + Assertions.assertThrows(UnsupportedOperationException.class, () -> stateManager.getCdcStateManager()); } private List createDefaultState() { From 9e8e1ae44ff58a0fdbbaf8ff61e03fe63def1821 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 14:16:05 -0400 Subject: [PATCH 19/34] Abstract global state manager selection --- .../source/relationaldb/AbstractDbSource.java | 16 ++++- .../relationaldb/state/StateManager.java | 3 +- .../state/StateManagerFactory.java | 23 ++---- .../state/StreamStateManager.java | 2 +- .../state/StateManagerFactoryTest.java | 71 ++++--------------- 5 files changed, 33 insertions(+), 82 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index 7921f1f9874e..ebffc59ba18a 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -52,6 +52,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; import org.slf4j.Logger; @@ -108,7 +109,7 @@ public AutoCloseableIterator read(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final JsonNode state) throws Exception { - final StateManager stateManager = StateManagerFactory.createStateManager(deserializeState(state, config), catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(deserializeState(state, config), catalog, supportsGlobalState(config)); final Instant emittedAt = Instant.now(); final Database database = createDatabaseInternal(config); @@ -516,7 +517,7 @@ private Database createDatabaseInternal(final JsonNode sourceConfig) throws Exce * Deserializes the state represented as JSON into an object representation. * * @param stateJson The state as JSON. - * @param config The plugin configuration. + * @param config The connector configuration. * @return The deserialized object representation of the state. */ protected List deserializeState(final JsonNode stateJson, final JsonNode config) { @@ -533,4 +534,15 @@ protected List deserializeState(final JsonNode stateJson, f } } + /** + * Generates a {@link Supplier} that can be used to determine if the global state manager should be + * selected for use by this connector. + * + * @param config The connector configuration. + * @return A {@link Supplier}. + */ + protected Supplier supportsGlobalState(final JsonNode config) { + return () -> false; + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java index 3493418e0689..a4234454b06f 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManager.java @@ -29,7 +29,8 @@ public interface StateManager { * Retrieves the {@link CdcStateManager} associated with the state manager. * * @return The {@link CdcStateManager} - * @throws UnsupportedOperationException if the state manager does not support tracking change data capture (CDC) state. + * @throws UnsupportedOperationException if the state manager does not support tracking change data + * capture (CDC) state. */ CdcStateManager getCdcStateManager(); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index 8e776ec8b81c..7abe7639c272 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -4,14 +4,12 @@ package io.airbyte.integrations.source.relationaldb.state; -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.util.List; +import java.util.function.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,15 +31,15 @@ private StateManagerFactory() {} * @param state The deserialized state. * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector that will utilize the state * manager. - * @param config The connector configuration. + * @param usesGlobalState {@link Supplier} that determines if global state is used by the connector. * @return A newly created {@link StateManager} implementation based on the provided state. */ public static StateManager createStateManager(final List state, final ConfiguredAirbyteCatalog catalog, - final JsonNode config) { + final Supplier usesGlobalState) { if (state != null && !state.isEmpty()) { final AirbyteStateMessage airbyteStateMessage = state.get(0); - if (isCdc(config) || airbyteStateMessage.getStateType() == AirbyteStateType.GLOBAL) { + if (usesGlobalState.get()) { LOGGER.info("Global state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); return new GlobalStateManager(airbyteStateMessage, catalog); } else if (airbyteStateMessage.getData() != null && airbyteStateMessage.getStream() == null) { @@ -56,17 +54,4 @@ public static StateManager createStateManager(final List st } } - /** - * Test whether the connector is configured to use change data capture (CDC) for replication. - * - * @param config The connector configuration. - * @return {@code true} if the connector utilizes CDC or {@code false} otherwise. - */ - @VisibleForTesting - protected static boolean isCdc(final JsonNode config) { - return config.hasNonNull("replication_method") - && config.get("replication_method").hasNonNull("replication_slot") - && config.get("replication_method").hasNonNull("publication"); - } - } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java index 009062827baf..9fee0a39ab6c 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StreamStateManager.java @@ -12,7 +12,6 @@ import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.source.relationaldb.CdcStateManager; import io.airbyte.integrations.source.relationaldb.CursorInfo; -import io.airbyte.integrations.source.relationaldb.models.CdcState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStreamState; @@ -78,4 +77,5 @@ public AirbyteStateMessage toState(final Optional GLOBAL_STATE = () -> true; + + private static final Supplier NO_GLOBAL_STATE = () -> false; + @Test void testNullOrEmptyState() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); - final JsonNode config = mock(JsonNode.class); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(null, catalog, config); + StateManagerFactory.createStateManager(null, catalog, NO_GLOBAL_STATE); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(List.of(), catalog, config); + StateManagerFactory.createStateManager(List.of(), catalog, NO_GLOBAL_STATE); }); } @@ -53,10 +54,9 @@ void testNullOrEmptyState() { void testLegacyStateManagerCreationFromAirbyteStateMessage() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); - final JsonNode config = mock(JsonNode.class); when(airbyteStateMessage.getData()).thenReturn(Jsons.jsonNode(new DbState())); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, NO_GLOBAL_STATE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); @@ -70,16 +70,8 @@ void testGlobalStateManagerCreation() { .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(NAMESPACE).withName(NAME)) .withStreamState(Jsons.jsonNode(new DbStreamState())))); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); - final JsonNode config = mock(JsonNode.class); - final JsonNode replicationConfig = mock(JsonNode.class); - - when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); - when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(true); - when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(true); - when(config.get(REPLICATION_METHOD)).thenReturn(replicationConfig); - - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); @@ -94,16 +86,8 @@ void testGlobalStateManagerCreationWithLegacyDataPresent() { .withStreamState(Jsons.jsonNode(new DbStreamState())))); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState).withData(Jsons.jsonNode(new DbState())); - final JsonNode config = mock(JsonNode.class); - final JsonNode replicationConfig = mock(JsonNode.class); - - when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); - when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(true); - when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(true); - when(config.get(REPLICATION_METHOD)).thenReturn(replicationConfig); - - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); @@ -115,9 +99,8 @@ void testStreamStateManagerCreation() { final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))); - final JsonNode config = mock(JsonNode.class); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, NO_GLOBAL_STATE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); @@ -130,41 +113,11 @@ void testStreamStateManagerCreationWithLegacyDataPresent() { .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))) .withData(Jsons.jsonNode(new DbState())); - final JsonNode config = mock(JsonNode.class); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, config); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, NO_GLOBAL_STATE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); } - @Test - void testCdcDetectionLogic() { - final JsonNode config = mock(JsonNode.class); - final JsonNode replicationConfig = mock(JsonNode.class); - - when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); - when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(true); - when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(true); - when(config.get(REPLICATION_METHOD)).thenReturn(replicationConfig); - assertTrue(StateManagerFactory.isCdc(config)); - - when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(false); - assertFalse(StateManagerFactory.isCdc(config)); - - when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); - when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(false); - assertFalse(StateManagerFactory.isCdc(config)); - - when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(true); - when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(true); - when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(false); - assertFalse(StateManagerFactory.isCdc(config)); - - when(replicationConfig.hasNonNull(REPLICATION_SLOT)).thenReturn(false); - when(replicationConfig.hasNonNull(PUBLICATION)).thenReturn(false); - when(config.hasNonNull(REPLICATION_METHOD)).thenReturn(false); - assertFalse(StateManagerFactory.isCdc(config)); - } - } From 9a9c782555c4d74050b4e54b4839e671f3623e33 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 16:32:39 -0400 Subject: [PATCH 20/34] Handle conversion between different state types --- .../source/postgres/PostgresSource.java | 8 +- .../source/relationaldb/AbstractDbSource.java | 11 ++- .../state/StateGeneratorUtils.java | 53 ++++++++++ .../state/StateManagerFactory.java | 93 +++++++++++++++--- .../state/StateManagerFactoryTest.java | 97 ++++++++++++++++--- 5 files changed, 232 insertions(+), 30 deletions(-) diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 645d1c2a797f..8919f526e6bc 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -50,6 +50,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -415,7 +416,7 @@ private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { @Override protected List deserializeState(final JsonNode stateJson, final JsonNode config) { if (stateJson == null) { - if (isCdc(config)) { + if (supportedStateTypeSupplier(config).get() == AirbyteStateType.GLOBAL) { final AirbyteGlobalState globalState = new AirbyteGlobalState() .withSharedState(Jsons.jsonNode(new CdcState())) .withStreamStates(List.of()); @@ -435,6 +436,11 @@ protected List deserializeState(final JsonNode stateJson, f } } + @Override + protected Supplier supportedStateTypeSupplier(final JsonNode config) { + return () -> isCdc(config) ? AirbyteStateType.GLOBAL : AirbyteStateType.STREAM; + } + public static void main(final String[] args) throws Exception { final Source source = PostgresSource.sshWrappedSource(); LOGGER.info("starting source: {}", PostgresSource.class); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index ebffc59ba18a..fca7d885aa95 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -109,7 +109,8 @@ public AutoCloseableIterator read(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final JsonNode state) throws Exception { - final StateManager stateManager = StateManagerFactory.createStateManager(deserializeState(state, config), catalog, supportsGlobalState(config)); + final StateManager stateManager = + StateManagerFactory.createStateManager(deserializeState(state, config), catalog, supportedStateTypeSupplier(config)); final Instant emittedAt = Instant.now(); final Database database = createDatabaseInternal(config); @@ -535,14 +536,14 @@ protected List deserializeState(final JsonNode stateJson, f } /** - * Generates a {@link Supplier} that can be used to determine if the global state manager should be - * selected for use by this connector. + * Generates a {@link Supplier} that can be used to determine which state manager should be selected + * for use by this connector. * * @param config The connector configuration. * @return A {@link Supplier}. */ - protected Supplier supportsGlobalState(final JsonNode config) { - return () -> false; + protected Supplier supportedStateTypeSupplier(final JsonNode config) { + return () -> AirbyteStateType.LEGACY; } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java index ecfa8c412732..479239983512 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java @@ -10,6 +10,9 @@ import io.airbyte.integrations.source.relationaldb.CursorInfo; import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteGlobalState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.StreamDescriptor; import java.util.Collections; @@ -157,4 +160,54 @@ public static boolean isValidStreamDescriptor(final StreamDescriptor streamDescr } } + /** + * Converts a {@link AirbyteStateType#LEGACY} state message into a {@link AirbyteStateType#GLOBAL} + * message. + * + * @param airbyteStateMessage A {@link AirbyteStateType#LEGACY} state message. + * @return A {@link AirbyteStateType#GLOBAL} state message. + */ + public static AirbyteStateMessage convertLegacyStateToGlobalState(final AirbyteStateMessage airbyteStateMessage) { + final DbState dbState = Jsons.object(airbyteStateMessage.getData(), DbState.class); + final AirbyteGlobalState globalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(dbState.getCdcState())) + .withStreamStates(dbState.getStreams().stream() + .map(s -> new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withName(s.getStreamName()).withNamespace(s.getStreamNamespace())) + .withStreamState(Jsons.jsonNode(s))) + .collect( + Collectors.toList())); + return new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); + } + + /** + * Converts a {@link AirbyteStateType#GLOBAL} state message into a list of + * {@link AirbyteStateType#STREAM} messages. + * + * @param airbyteStateMessage A {@link AirbyteStateType#GLOBAL} state message. + * @return A list {@link AirbyteStateType#STREAM} state messages. + */ + public static List convertGlobalStateToStreamState(final AirbyteStateMessage airbyteStateMessage) { + return airbyteStateMessage.getGlobal().getStreamStates().stream() + .map(s -> new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(s.getStreamDescriptor()).withStreamState(s.getStreamState()))) + .collect(Collectors.toList()); + } + + /** + * Converts a {@link AirbyteStateType#LEGACY} state message into a list of + * {@link AirbyteStateType#STREAM} messages. + * + * @param airbyteStateMessage A {@link AirbyteStateType#LEGACY} state message. + * @return A list {@link AirbyteStateType#STREAM} state messages. + */ + public static List convertLegacyStateToStreamState(final AirbyteStateMessage airbyteStateMessage) { + return Jsons.object(airbyteStateMessage.getData(), DbState.class).getStreams().stream() + .map(s -> new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState() + .withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName())) + .withStreamState(Jsons.jsonNode(s)))) + .collect(Collectors.toList()); + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index 7abe7639c272..eda12425fc7b 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -7,7 +7,9 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.ArrayList; import java.util.List; import java.util.function.Supplier; import org.slf4j.Logger; @@ -26,32 +28,99 @@ public class StateManagerFactory { private StateManagerFactory() {} /** - * Creates a {@link StateManager} based on the provided state object and catalog. + * Creates a {@link StateManager} based on the provided state object and catalog. This method will handle the + * conversion of the provided state to match the requested state manager based on the provided {@link AirbyteStateType}. * * @param state The deserialized state. * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector that will utilize the state * manager. - * @param usesGlobalState {@link Supplier} that determines if global state is used by the connector. + * @param stateTypeSupplier {@link Supplier} that provides the {@link AirbyteStateType} that will be + * used to select the correct state manager. * @return A newly created {@link StateManager} implementation based on the provided state. */ public static StateManager createStateManager(final List state, final ConfiguredAirbyteCatalog catalog, - final Supplier usesGlobalState) { + final Supplier stateTypeSupplier) { if (state != null && !state.isEmpty()) { final AirbyteStateMessage airbyteStateMessage = state.get(0); - if (usesGlobalState.get()) { - LOGGER.info("Global state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); - return new GlobalStateManager(airbyteStateMessage, catalog); - } else if (airbyteStateMessage.getData() != null && airbyteStateMessage.getStream() == null) { - LOGGER.info("Legacy state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); - return new LegacyStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); - } else { - LOGGER.info("Stream state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); - return new StreamStateManager(state, catalog); + switch (stateTypeSupplier.get()) { + case LEGACY: + LOGGER.info("Legacy state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + return new LegacyStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); + case GLOBAL: + LOGGER.info("Global state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + return new GlobalStateManager(generateGlobalState(airbyteStateMessage), catalog); + case STREAM: + default: + LOGGER.info("Stream state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); + return new StreamStateManager(generateStreamState(state), catalog); } } else { throw new IllegalArgumentException("Failed to create state manager due to empty state list."); } } + /** + * Handles the conversion between a different state type and the global state. This method handles + * the following transitions: + *
    + *
  • Stream -> Global (not supported, results in {@link IllegalArgumentException}
  • + *
  • Legacy -> Global (supported)
  • + *
  • Global -> Glboal (supported/no conversion required)
  • + *
+ * + * @param airbyteStateMessage The current state that is to be converted to global state. + * @return The converted state message. + * @throws IllegalArgumentException if unable to convert between the given state type and global. + */ + private static AirbyteStateMessage generateGlobalState(final AirbyteStateMessage airbyteStateMessage) { + AirbyteStateMessage globalStateMessage = airbyteStateMessage; + + switch (airbyteStateMessage.getStateType()) { + case STREAM: + throw new IllegalArgumentException("Unable to convert connector state from per-stream to global. Please reset the connection to continue."); + case LEGACY: + globalStateMessage = StateGeneratorUtils.convertLegacyStateToGlobalState(airbyteStateMessage); + LOGGER.info("Legacy state converted to global state.", airbyteStateMessage.getStateType()); + break; + case GLOBAL: + default: + break; + } + + return globalStateMessage; + } + + /** + * Handles the conversion between a different state type and the stream state. This method handles + * the following transitions: + *
    + *
  • Global -> Stream (supported/shared state discarded)
  • + *
  • Legacy -> Stream (supported)
  • + *
  • Stream -> Stream (supported/no conversion required)
  • + *
+ * + * @param states The list of current states. + * @return The converted state messages. + */ + private static List generateStreamState(final List states) { + final AirbyteStateMessage airbyteStateMessage = states.get(0); + final List streamStates = new ArrayList<>(); + switch (airbyteStateMessage.getStateType()) { + case GLOBAL: + streamStates.addAll(StateGeneratorUtils.convertGlobalStateToStreamState(airbyteStateMessage)); + LOGGER.info("Global state converted to stream state.", airbyteStateMessage.getStateType()); + break; + case LEGACY: + streamStates.addAll(StateGeneratorUtils.convertLegacyStateToStreamState(airbyteStateMessage)); + break; + case STREAM: + default: + streamStates.addAll(states); + break; + } + + return streamStates; + } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index abb9c23db3e5..ff9274f48d7a 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -29,24 +29,39 @@ public class StateManagerFactoryTest { private static final String NAMESPACE = "namespace"; private static final String NAME = "name"; - private static final String REPLICATION_SLOT = "replication_slot"; - private static final String PUBLICATION = "publication"; - private static final String REPLICATION_METHOD = "replication_method"; - private static final Supplier GLOBAL_STATE = () -> true; + private static final Supplier GLOBAL_STATE_TYPE = () -> AirbyteStateType.GLOBAL; - private static final Supplier NO_GLOBAL_STATE = () -> false; + private static final Supplier LEGACY_STATE_TYPE = () -> AirbyteStateType.LEGACY; + + private static final Supplier STREAM_STATE_TYPE = () -> AirbyteStateType.STREAM; @Test void testNullOrEmptyState() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(null, catalog, NO_GLOBAL_STATE); + StateManagerFactory.createStateManager(null, catalog, GLOBAL_STATE_TYPE); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(List.of(), catalog, GLOBAL_STATE_TYPE); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(null, catalog, LEGACY_STATE_TYPE); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(List.of(), catalog, LEGACY_STATE_TYPE); + }); + + Assertions.assertThrows(IllegalArgumentException.class, () -> { + StateManagerFactory.createStateManager(null, catalog, STREAM_STATE_TYPE); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(List.of(), catalog, NO_GLOBAL_STATE); + StateManagerFactory.createStateManager(List.of(), catalog, STREAM_STATE_TYPE); }); } @@ -56,7 +71,7 @@ void testLegacyStateManagerCreationFromAirbyteStateMessage() { final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); when(airbyteStateMessage.getData()).thenReturn(Jsons.jsonNode(new DbState())); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, NO_GLOBAL_STATE); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, LEGACY_STATE_TYPE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); @@ -71,12 +86,39 @@ void testGlobalStateManagerCreation() { .withStreamState(Jsons.jsonNode(new DbStreamState())))); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE_TYPE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); } + @Test + void testGlobalStateManagerCreationFromLegacyState() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final CdcState cdcState = new CdcState(); + final DbState dbState = new DbState() + .withCdcState(cdcState) + .withStreams(List.of(new DbStreamState().withStreamName(NAME).withStreamNamespace(NAMESPACE))); + final AirbyteStateMessage airbyteStateMessage = + new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(dbState)); + + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE_TYPE); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); + } + + @Test + void testGlobalStateManagerCreationFromStreamState() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( + NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))); + + Assertions.assertThrows(IllegalArgumentException.class, + () -> StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE_TYPE)); + } + @Test void testGlobalStateManagerCreationWithLegacyDataPresent() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); @@ -87,7 +129,7 @@ void testGlobalStateManagerCreationWithLegacyDataPresent() { final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState).withData(Jsons.jsonNode(new DbState())); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE_TYPE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); @@ -100,7 +142,38 @@ void testStreamStateManagerCreation() { .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, NO_GLOBAL_STATE); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); + } + + @Test + void testStreamStateManagerCreationFromLegacy() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final CdcState cdcState = new CdcState(); + final DbState dbState = new DbState() + .withCdcState(cdcState) + .withStreams(List.of(new DbStreamState().withStreamName(NAME).withStreamNamespace(NAMESPACE))); + final AirbyteStateMessage airbyteStateMessage = + new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(dbState)); + + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE); + + Assertions.assertNotNull(stateManager); + Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); + } + + @Test + void testStreamStateManagerCreationFromGlobal() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final AirbyteGlobalState globalState = + new AirbyteGlobalState().withSharedState(Jsons.jsonNode(new DbState().withCdcState(new CdcState().withState(Jsons.jsonNode(new DbState()))))) + .withStreamStates(List.of(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withNamespace(NAMESPACE).withName(NAME)) + .withStreamState(Jsons.jsonNode(new DbStreamState())))); + final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); + + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); @@ -114,7 +187,7 @@ void testStreamStateManagerCreationWithLegacyDataPresent() { NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))) .withData(Jsons.jsonNode(new DbState())); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, NO_GLOBAL_STATE); + final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE); Assertions.assertNotNull(stateManager); Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); From 5c83969c7c03df583f7062c7b0ef0095dcfe08cb Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 16:44:23 -0400 Subject: [PATCH 21/34] Handle invalid conversion --- .../source/relationaldb/state/StateManagerFactory.java | 7 +++---- .../source/relationaldb/state/StateManagerFactoryTest.java | 5 +---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index eda12425fc7b..8ed39cc44606 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -78,7 +78,7 @@ private static AirbyteStateMessage generateGlobalState(final AirbyteStateMessage switch (airbyteStateMessage.getStateType()) { case STREAM: - throw new IllegalArgumentException("Unable to convert connector state from per-stream to global. Please reset the connection to continue."); + throw new IllegalArgumentException("Unable to convert connector state from stream to global. Please reset the connection to continue."); case LEGACY: globalStateMessage = StateGeneratorUtils.convertLegacyStateToGlobalState(airbyteStateMessage); LOGGER.info("Legacy state converted to global state.", airbyteStateMessage.getStateType()); @@ -102,15 +102,14 @@ private static AirbyteStateMessage generateGlobalState(final AirbyteStateMessage * * @param states The list of current states. * @return The converted state messages. + * @throws IllegalArgumentException if unable to convert between the given state type and stream. */ private static List generateStreamState(final List states) { final AirbyteStateMessage airbyteStateMessage = states.get(0); final List streamStates = new ArrayList<>(); switch (airbyteStateMessage.getStateType()) { case GLOBAL: - streamStates.addAll(StateGeneratorUtils.convertGlobalStateToStreamState(airbyteStateMessage)); - LOGGER.info("Global state converted to stream state.", airbyteStateMessage.getStateType()); - break; + throw new IllegalArgumentException("Unable to convert connector state from global to stream. Please reset the connection to continue."); case LEGACY: streamStates.addAll(StateGeneratorUtils.convertLegacyStateToStreamState(airbyteStateMessage)); break; diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index ff9274f48d7a..eab6e8fe56dd 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -173,10 +173,7 @@ void testStreamStateManagerCreationFromGlobal() { .withStreamState(Jsons.jsonNode(new DbStreamState())))); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE); - - Assertions.assertNotNull(stateManager); - Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); + Assertions.assertThrows(IllegalArgumentException.class, () -> StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE)); } @Test From fe076d75feb2a6a34cba4e8eac2dfbe3e83e6d8f Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Tue, 14 Jun 2022 16:45:29 -0400 Subject: [PATCH 22/34] Rename parameter --- .../source/relationaldb/state/StateManagerFactory.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index 8ed39cc44606..ac2b26e317ee 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -31,18 +31,18 @@ private StateManagerFactory() {} * Creates a {@link StateManager} based on the provided state object and catalog. This method will handle the * conversion of the provided state to match the requested state manager based on the provided {@link AirbyteStateType}. * - * @param state The deserialized state. + * @param initialState The deserialized initial state that will be provided to the selected {@link StateManager}. * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector that will utilize the state * manager. * @param stateTypeSupplier {@link Supplier} that provides the {@link AirbyteStateType} that will be * used to select the correct state manager. * @return A newly created {@link StateManager} implementation based on the provided state. */ - public static StateManager createStateManager(final List state, + public static StateManager createStateManager(final List initialState, final ConfiguredAirbyteCatalog catalog, final Supplier stateTypeSupplier) { - if (state != null && !state.isEmpty()) { - final AirbyteStateMessage airbyteStateMessage = state.get(0); + if (initialState != null && !initialState.isEmpty()) { + final AirbyteStateMessage airbyteStateMessage = initialState.get(0); switch (stateTypeSupplier.get()) { case LEGACY: LOGGER.info("Legacy state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); @@ -53,7 +53,7 @@ public static StateManager createStateManager(final List st case STREAM: default: LOGGER.info("Stream state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); - return new StreamStateManager(generateStreamState(state), catalog); + return new StreamStateManager(generateStreamState(initialState), catalog); } } else { throw new IllegalArgumentException("Failed to create state manager due to empty state list."); From c4824a317a29c3457bd2f3091aa25407d9a32840 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Wed, 15 Jun 2022 09:31:20 -0400 Subject: [PATCH 23/34] Refactor state manager creation --- .../source/postgres/PostgresSource.java | 34 ++++++----------- .../source/relationaldb/AbstractDbSource.java | 35 +++++++++++------- .../state/StateManagerFactory.java | 12 +++--- .../state/StateManagerFactoryTest.java | 37 ++++++++----------- 4 files changed, 53 insertions(+), 65 deletions(-) diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 8919f526e6bc..04c708fe7a8f 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -411,34 +411,24 @@ private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { return stream; } - // TODO This is a temporary override so that the Postgres source can take advantage of per-stream - // state. + // TODO This is a temporary override so that the Postgres source can take advantage of per-stream state @Override - protected List deserializeState(final JsonNode stateJson, final JsonNode config) { - if (stateJson == null) { - if (supportedStateTypeSupplier(config).get() == AirbyteStateType.GLOBAL) { - final AirbyteGlobalState globalState = new AirbyteGlobalState() - .withSharedState(Jsons.jsonNode(new CdcState())) - .withStreamStates(List.of()); - return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState)); - } else { - return List.of(new AirbyteStateMessage() - .withStateType(AirbyteStateType.STREAM) - .withStream(new AirbyteStreamState())); - } + protected List generateEmptyInitialState(final JsonNode config) { + if (getSupportedStateType(config) == AirbyteStateType.GLOBAL) { + final AirbyteGlobalState globalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(new CdcState())) + .withStreamStates(List.of()); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState)); } else { - try { - return Jsons.object(stateJson, new AirbyteStateMessageListTypeReference()); - } catch (final IllegalArgumentException e) { - LOGGER.warn("Defaulting to legacy state object..."); - return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(stateJson)); - } + return List.of(new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState())); } } @Override - protected Supplier supportedStateTypeSupplier(final JsonNode config) { - return () -> isCdc(config) ? AirbyteStateType.GLOBAL : AirbyteStateType.STREAM; + protected AirbyteStateType getSupportedStateType(final JsonNode config) { + return isCdc(config) ? AirbyteStateType.GLOBAL : AirbyteStateType.STREAM; } public static void main(final String[] args) throws Exception { diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index fca7d885aa95..015bbfffb916 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -52,7 +52,6 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import java.util.function.Predicate; -import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; import org.slf4j.Logger; @@ -110,7 +109,7 @@ public AutoCloseableIterator read(final JsonNode config, final JsonNode state) throws Exception { final StateManager stateManager = - StateManagerFactory.createStateManager(deserializeState(state, config), catalog, supportedStateTypeSupplier(config)); + StateManagerFactory.createStateManager(getSupportedStateType(config), deserializeInitialState(state, config), catalog); final Instant emittedAt = Instant.now(); final Database database = createDatabaseInternal(config); @@ -517,33 +516,41 @@ private Database createDatabaseInternal(final JsonNode sourceConfig) throws Exce /** * Deserializes the state represented as JSON into an object representation. * - * @param stateJson The state as JSON. + * @param initialStateJson The state as JSON. * @param config The connector configuration. * @return The deserialized object representation of the state. */ - protected List deserializeState(final JsonNode stateJson, final JsonNode config) { - if (stateJson == null) { - // For backwards compatibility with existing connectors - return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(new DbState()))); + protected List deserializeInitialState(final JsonNode initialStateJson, final JsonNode config) { + if (initialStateJson == null) { + return generateEmptyInitialState(config); } else { try { - return Jsons.object(stateJson, new AirbyteStateMessageListTypeReference()); + return Jsons.object(initialStateJson, new AirbyteStateMessageListTypeReference()); } catch (final IllegalArgumentException e) { LOGGER.warn("Defaulting to legacy state object..."); - return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(stateJson)); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(initialStateJson)); } } } /** - * Generates a {@link Supplier} that can be used to determine which state manager should be selected - * for use by this connector. + * Generates an empty, initial state for use by the connector. + * @param config The connector configuration. + * @return The empty, initial state. + */ + protected List generateEmptyInitialState(final JsonNode config) { + // For backwards compatibility with existing connectors + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(new DbState()))); + } + + /** + * Returns the {@link AirbyteStateType} supported by this connector. * * @param config The connector configuration. - * @return A {@link Supplier}. + * @return A {@link AirbyteStateType} representing the state supported by this connector. */ - protected Supplier supportedStateTypeSupplier(final JsonNode config) { - return () -> AirbyteStateType.LEGACY; + protected AirbyteStateType getSupportedStateType(final JsonNode config) { + return AirbyteStateType.LEGACY; } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index ac2b26e317ee..a1e60a30bb7e 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -11,7 +11,6 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.util.ArrayList; import java.util.List; -import java.util.function.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,19 +30,18 @@ private StateManagerFactory() {} * Creates a {@link StateManager} based on the provided state object and catalog. This method will handle the * conversion of the provided state to match the requested state manager based on the provided {@link AirbyteStateType}. * + * @param supportedStateType The type of state supported by the connector. * @param initialState The deserialized initial state that will be provided to the selected {@link StateManager}. * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector that will utilize the state * manager. - * @param stateTypeSupplier {@link Supplier} that provides the {@link AirbyteStateType} that will be - * used to select the correct state manager. * @return A newly created {@link StateManager} implementation based on the provided state. */ - public static StateManager createStateManager(final List initialState, - final ConfiguredAirbyteCatalog catalog, - final Supplier stateTypeSupplier) { + public static StateManager createStateManager(final AirbyteStateType supportedStateType, + final List initialState, + final ConfiguredAirbyteCatalog catalog) { if (initialState != null && !initialState.isEmpty()) { final AirbyteStateMessage airbyteStateMessage = initialState.get(0); - switch (stateTypeSupplier.get()) { + switch (supportedStateType) { case LEGACY: LOGGER.info("Legacy state manager selected to manage state object with type {}.", airbyteStateMessage.getStateType()); return new LegacyStateManager(Jsons.object(airbyteStateMessage.getData(), DbState.class), catalog); diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index eab6e8fe56dd..2a68f218e59b 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -18,7 +18,6 @@ import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.StreamDescriptor; import java.util.List; -import java.util.function.Supplier; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -30,38 +29,32 @@ public class StateManagerFactoryTest { private static final String NAMESPACE = "namespace"; private static final String NAME = "name"; - private static final Supplier GLOBAL_STATE_TYPE = () -> AirbyteStateType.GLOBAL; - - private static final Supplier LEGACY_STATE_TYPE = () -> AirbyteStateType.LEGACY; - - private static final Supplier STREAM_STATE_TYPE = () -> AirbyteStateType.STREAM; - @Test void testNullOrEmptyState() { final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(null, catalog, GLOBAL_STATE_TYPE); + StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, null, catalog); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(List.of(), catalog, GLOBAL_STATE_TYPE); + StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(), catalog); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(null, catalog, LEGACY_STATE_TYPE); + StateManagerFactory.createStateManager(AirbyteStateType.LEGACY,null, catalog); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(List.of(), catalog, LEGACY_STATE_TYPE); + StateManagerFactory.createStateManager(AirbyteStateType.LEGACY, List.of(), catalog); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(null, catalog, STREAM_STATE_TYPE); + StateManagerFactory.createStateManager(AirbyteStateType.STREAM, null, catalog); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(List.of(), catalog, STREAM_STATE_TYPE); + StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(), catalog); }); } @@ -71,7 +64,7 @@ void testLegacyStateManagerCreationFromAirbyteStateMessage() { final AirbyteStateMessage airbyteStateMessage = mock(AirbyteStateMessage.class); when(airbyteStateMessage.getData()).thenReturn(Jsons.jsonNode(new DbState())); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, LEGACY_STATE_TYPE); + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.LEGACY, List.of(airbyteStateMessage), catalog); Assertions.assertNotNull(stateManager); Assertions.assertEquals(LegacyStateManager.class, stateManager.getClass()); @@ -86,7 +79,7 @@ void testGlobalStateManagerCreation() { .withStreamState(Jsons.jsonNode(new DbStreamState())))); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE_TYPE); + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(airbyteStateMessage), catalog); Assertions.assertNotNull(stateManager); Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); @@ -102,7 +95,7 @@ void testGlobalStateManagerCreationFromLegacyState() { final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(dbState)); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE_TYPE); + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(airbyteStateMessage), catalog); Assertions.assertNotNull(stateManager); Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); @@ -116,7 +109,7 @@ void testGlobalStateManagerCreationFromStreamState() { NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))); Assertions.assertThrows(IllegalArgumentException.class, - () -> StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE_TYPE)); + () -> StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(airbyteStateMessage), catalog)); } @Test @@ -129,7 +122,7 @@ void testGlobalStateManagerCreationWithLegacyDataPresent() { final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState).withData(Jsons.jsonNode(new DbState())); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, GLOBAL_STATE_TYPE); + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.GLOBAL, List.of(airbyteStateMessage), catalog); Assertions.assertNotNull(stateManager); Assertions.assertEquals(GlobalStateManager.class, stateManager.getClass()); @@ -142,7 +135,7 @@ void testStreamStateManagerCreation() { .withStream(new AirbyteStreamState().withStreamDescriptor(new StreamDescriptor().withName(NAME).withNamespace( NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE); + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog); Assertions.assertNotNull(stateManager); Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); @@ -158,7 +151,7 @@ void testStreamStateManagerCreationFromLegacy() { final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.LEGACY).withData(Jsons.jsonNode(dbState)); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE); + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog); Assertions.assertNotNull(stateManager); Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); @@ -173,7 +166,7 @@ void testStreamStateManagerCreationFromGlobal() { .withStreamState(Jsons.jsonNode(new DbStreamState())))); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); - Assertions.assertThrows(IllegalArgumentException.class, () -> StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE)); + Assertions.assertThrows(IllegalArgumentException.class, () -> StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog)); } @Test @@ -184,7 +177,7 @@ void testStreamStateManagerCreationWithLegacyDataPresent() { NAMESPACE)).withStreamState(Jsons.jsonNode(new DbStreamState()))) .withData(Jsons.jsonNode(new DbState())); - final StateManager stateManager = StateManagerFactory.createStateManager(List.of(airbyteStateMessage), catalog, STREAM_STATE_TYPE); + final StateManager stateManager = StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog); Assertions.assertNotNull(stateManager); Assertions.assertEquals(StreamStateManager.class, stateManager.getClass()); From 1a500d8e9c86f65b125cd00da5c665bc6ba62feb Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Wed, 15 Jun 2022 10:31:13 -0400 Subject: [PATCH 24/34] Fix failing tests --- .../AbstractJdbcSourceAcceptanceTest.java | 31 +++++++++++++++++++ .../state/StateManagerFactory.java | 4 +-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java index 909194580404..b8c0f28b17bf 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java @@ -15,8 +15,14 @@ import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; +import io.airbyte.integrations.source.relationaldb.models.CdcState; +import io.airbyte.protocol.models.AirbyteGlobalState; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; +import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.test.utils.PostgreSQLContainerHelper; import java.sql.JDBCType; +import java.util.List; import java.util.Set; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -82,6 +88,11 @@ public String getDriverClass() { return PostgresTestSource.DRIVER_CLASS; } + @Override + protected boolean supportsPerStream() { + return true; + } + @AfterAll static void cleanUp() { PSQL_DB.close(); @@ -118,6 +129,26 @@ public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } + // TODO This is a temporary override so that the Postgres source can take advantage of per-stream state + @Override + protected List generateEmptyInitialState(final JsonNode config) { + if (getSupportedStateType(config) == AirbyteStateType.GLOBAL) { + final AirbyteGlobalState globalState = new AirbyteGlobalState() + .withSharedState(Jsons.jsonNode(new CdcState())) + .withStreamStates(List.of()); + return List.of(new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState)); + } else { + return List.of(new AirbyteStateMessage() + .withStateType(AirbyteStateType.STREAM) + .withStream(new AirbyteStreamState())); + } + } + + @Override + protected AirbyteStateType getSupportedStateType(final JsonNode config) { + return AirbyteStateType.STREAM; + } + public static void main(final String[] args) throws Exception { final Source source = new PostgresTestSource(); LOGGER.info("starting source: {}", PostgresTestSource.class); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index a1e60a30bb7e..b2d86c222464 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -64,7 +64,7 @@ public static StateManager createStateManager(final AirbyteStateType supportedSt *
    *
  • Stream -> Global (not supported, results in {@link IllegalArgumentException}
  • *
  • Legacy -> Global (supported)
  • - *
  • Global -> Glboal (supported/no conversion required)
  • + *
  • Global -> Global (supported/no conversion required)
  • *
* * @param airbyteStateMessage The current state that is to be converted to global state. @@ -93,7 +93,7 @@ private static AirbyteStateMessage generateGlobalState(final AirbyteStateMessage * Handles the conversion between a different state type and the stream state. This method handles * the following transitions: *
    - *
  • Global -> Stream (supported/shared state discarded)
  • + *
  • Global -> Stream (not supported, results in {@link IllegalArgumentException}
  • *
  • Legacy -> Stream (supported)
  • *
  • Stream -> Stream (supported/no conversion required)
  • *
From d21a6a0f368d979d4813ffa904a226c5409d7151 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Wed, 15 Jun 2022 14:29:49 -0400 Subject: [PATCH 25/34] Fix failing integration tests --- .../airbyte/integrations/debezium/CdcSourceTest.java | 10 +++++----- .../airbyte/integrations/debezium/CdcSourceTest.java | 10 +++++----- .../standardtest/source/SourceAcceptanceTest.java | 2 +- .../source/mysql/CdcMySqlSourceAcceptanceTest.java | 2 +- .../source/postgres/CdcPostgresSourceTest.java | 2 +- .../source/relationaldb/state/GlobalStateManager.java | 2 +- .../relationaldb/state/GlobalStateManagerTest.java | 4 +++- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/airbyte-integrations/bases/debezium-v1-4-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java b/airbyte-integrations/bases/debezium-v1-4-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java index a1049f0b7450..04cd2bfc20b8 100644 --- a/airbyte-integrations/bases/debezium-v1-4-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java +++ b/airbyte-integrations/bases/debezium-v1-4-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java @@ -316,7 +316,7 @@ void testDelete() throws Exception { .format("DELETE FROM %s.%s WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, 11)); - final JsonNode state = stateMessages1.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateMessages1); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -347,7 +347,7 @@ void testUpdate() throws Exception { .format("UPDATE %s.%s SET %s = '%s' WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_MODEL, updatedModel, COL_ID, 11)); - final JsonNode state = stateMessages1.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateMessages1); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -403,7 +403,7 @@ void testRecordsProducedDuringAndAfterSync() throws Exception { recordsCreated[0]++; } - final JsonNode state = stateAfterFirstBatch.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateAfterFirstBatch); final AutoCloseableIterator secondBatchIterator = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List dataFromSecondBatch = AutoCloseableIterators @@ -492,7 +492,7 @@ void testCdcAndFullRefreshInSameSync() throws Exception { .jsonNode(ImmutableMap.of(COL_ID, 100, COL_MAKE_ID, 3, COL_MODEL, "Punto")); writeModelRecord(puntoRecord); - final JsonNode state = extractStateMessages(actualRecords1).get(0).getData(); + final JsonNode state = Jsons.jsonNode(extractStateMessages(actualRecords1)); final AutoCloseableIterator read2 = getSource() .read(getConfig(), configuredCatalog, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -535,7 +535,7 @@ void testNoDataOnSecondSync() throws Exception { final AutoCloseableIterator read1 = getSource() .read(getConfig(), CONFIGURED_CATALOG, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); - final JsonNode state = extractStateMessages(actualRecords1).get(0).getData(); + final JsonNode state = Jsons.jsonNode(extractStateMessages(actualRecords1)); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); diff --git a/airbyte-integrations/bases/debezium-v1-9-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java b/airbyte-integrations/bases/debezium-v1-9-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java index 79d6dbbd5b31..441de6ff481e 100644 --- a/airbyte-integrations/bases/debezium-v1-9-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java +++ b/airbyte-integrations/bases/debezium-v1-9-2/src/testFixtures/java/io/airbyte/integrations/debezium/CdcSourceTest.java @@ -316,7 +316,7 @@ void testDelete() throws Exception { .format("DELETE FROM %s.%s WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, 11)); - final JsonNode state = stateMessages1.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateMessages1); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -347,7 +347,7 @@ void testUpdate() throws Exception { .format("UPDATE %s.%s SET %s = '%s' WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_MODEL, updatedModel, COL_ID, 11)); - final JsonNode state = stateMessages1.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateMessages1); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -399,7 +399,7 @@ protected void testRecordsProducedDuringAndAfterSync() throws Exception { writeModelRecord(record); } - final JsonNode state = stateAfterFirstBatch.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateAfterFirstBatch); final AutoCloseableIterator secondBatchIterator = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List dataFromSecondBatch = AutoCloseableIterators @@ -488,7 +488,7 @@ void testCdcAndFullRefreshInSameSync() throws Exception { .jsonNode(ImmutableMap.of(COL_ID, 100, COL_MAKE_ID, 3, COL_MODEL, "Punto")); writeModelRecord(puntoRecord); - final JsonNode state = extractStateMessages(actualRecords1).get(0).getData(); + final JsonNode state = Jsons.jsonNode(extractStateMessages(actualRecords1)); final AutoCloseableIterator read2 = getSource() .read(getConfig(), configuredCatalog, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); @@ -531,7 +531,7 @@ void testNoDataOnSecondSync() throws Exception { final AutoCloseableIterator read1 = getSource() .read(getConfig(), CONFIGURED_CATALOG, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); - final JsonNode state = extractStateMessages(actualRecords1).get(0).getData(); + final JsonNode state = Jsons.jsonNode(extractStateMessages(actualRecords1)); final AutoCloseableIterator read2 = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java index 186d0b3c14ad..723ac31ba11a 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java @@ -236,7 +236,7 @@ public void testIncrementalSyncWithState() throws Exception { // when we run incremental sync again there should be no new records. Run a sync with the latest // state message and assert no records were emitted. - final JsonNode latestState = stateMessages.get(stateMessages.size() - 1).getData(); + final JsonNode latestState = Jsons.jsonNode(stateMessages); final List secondSyncRecords = filterRecords(runRead(configuredCatalog, latestState)); assertTrue( secondSyncRecords.isEmpty(), diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java index f1008f08b40c..d74972818ab2 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java @@ -174,7 +174,7 @@ public void testIncrementalSyncFailedIfBinlogIsDeleted() throws Exception { // when we run incremental sync again there should be no new records. Run a sync with the latest // state message and assert no records were emitted. - final JsonNode latestState = stateMessages.get(stateMessages.size() - 1).getData(); + final JsonNode latestState = Jsons.jsonNode(stateMessages); // RESET MASTER removes all binary log files that are listed in the index file, // leaving only a single, empty binary log file with a numeric suffix of .000001 executeQuery("RESET MASTER;"); diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java index 6d2caa067420..2aa5e03ebfda 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java @@ -287,7 +287,7 @@ public void testRecordsProducedDuringAndAfterSync() throws Exception { writeModelRecord(record); } - final JsonNode state = stateAfterFirstBatch.get(0).getData(); + final JsonNode state = Jsons.jsonNode(stateAfterFirstBatch); final AutoCloseableIterator secondBatchIterator = getSource() .read(getConfig(), CONFIGURED_CATALOG, state); final List dataFromSecondBatch = AutoCloseableIterators diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java index 93d159641641..ca8b516c7cb3 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/GlobalStateManager.java @@ -92,7 +92,7 @@ public AirbyteStateMessage toState(final Optional Date: Thu, 16 Jun 2022 12:15:48 -0400 Subject: [PATCH 26/34] Add CDC test --- .../state/LegacyStateManagerTest.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java index c3b796f3d270..f3ddbfd1f180 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java @@ -14,17 +14,24 @@ import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME2; import static io.airbyte.integrations.source.relationaldb.state.StateTestConstants.STREAM_NAME3; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mock; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.protocol.models.AirbyteGlobalState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.StreamDescriptor; import java.util.Comparator; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import org.junit.jupiter.api.Test; @@ -163,4 +170,15 @@ void testCursorNotUpdatedForCdc() { assertEquals(expectedSecondEmission, actualSecondEmission); } + @Test + void testCdcStateManager() { + final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + final CdcState cdcState = new CdcState().withState(Jsons.jsonNode(Map.of("foo", "bar", "baz", 5))); + final DbState dbState = new DbState().withCdcState(cdcState).withStreams(List.of( + new DbStreamState().withStreamNamespace(NAMESPACE).withStreamName(STREAM_NAME1))); + final StateManager stateManager = new LegacyStateManager(dbState, catalog); + assertNotNull(stateManager.getCdcStateManager()); + assertEquals(cdcState, stateManager.getCdcStateManager().getCdcState()); + } + } From b478d572ed7927abaac5adb322e8b21224c26a01 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Thu, 16 Jun 2022 12:24:54 -0400 Subject: [PATCH 27/34] Fix failing integration test --- .../integrations/standardtest/source/SourceAcceptanceTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java index 723ac31ba11a..44ca43dd0010 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import io.airbyte.commons.json.Jsons; import io.airbyte.config.StandardCheckConnectionOutput.Status; @@ -236,7 +237,7 @@ public void testIncrementalSyncWithState() throws Exception { // when we run incremental sync again there should be no new records. Run a sync with the latest // state message and assert no records were emitted. - final JsonNode latestState = Jsons.jsonNode(stateMessages); + final JsonNode latestState = Jsons.jsonNode(List.of(Iterables.getLast(stateMessages))); final List secondSyncRecords = filterRecords(runRead(configuredCatalog, latestState)); assertTrue( secondSyncRecords.isEmpty(), From ae867bf8c23d6a865bea9199526b48e3d67141a7 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Thu, 16 Jun 2022 12:27:05 -0400 Subject: [PATCH 28/34] Revert change --- .../source/mysql/CdcMySqlSourceAcceptanceTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java index d74972818ab2..4ec58c6634c8 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; @@ -174,7 +175,7 @@ public void testIncrementalSyncFailedIfBinlogIsDeleted() throws Exception { // when we run incremental sync again there should be no new records. Run a sync with the latest // state message and assert no records were emitted. - final JsonNode latestState = Jsons.jsonNode(stateMessages); + final JsonNode latestState = Jsons.jsonNode(List.of(Iterables.getLast(stateMessages))); // RESET MASTER removes all binary log files that are listed in the index file, // leaving only a single, empty binary log file with a numeric suffix of .000001 executeQuery("RESET MASTER;"); From b753b1425780e8b2fb3b498060318e6af22441df Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Thu, 16 Jun 2022 14:16:43 -0400 Subject: [PATCH 29/34] Fix failing integration test --- .../standardtest/source/SourceAcceptanceTest.java | 14 +++++++++++++- .../source/mysql/CdcMySqlSourceAcceptanceTest.java | 2 +- .../sources/PostgresSourceAcceptanceTest.java | 4 ++++ .../relationaldb/state/StateGeneratorUtils.java | 4 ++-- .../state/StateGeneratorUtilsTest.java | 4 ++-- 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java index 44ca43dd0010..a6e2d50c85aa 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceAcceptanceTest.java @@ -107,6 +107,18 @@ public abstract class SourceAcceptanceTest extends AbstractSourceConnectorTest { */ protected abstract JsonNode getState() throws Exception; + /** + * Tests whether the connector under test supports the per-stream state format or should use the + * legacy format for data generated by this test. + * + * @return {@code true} if the connector supports the per-stream state format or {@code false} if it + * does not support the per-stream state format (e.g. legacy format supported). Default + * value is {@code false}. + */ + protected boolean supportsPerStream() { + return false; + } + /** * Verify that a spec operation issued to the connector returns a valid spec. */ @@ -237,7 +249,7 @@ public void testIncrementalSyncWithState() throws Exception { // when we run incremental sync again there should be no new records. Run a sync with the latest // state message and assert no records were emitted. - final JsonNode latestState = Jsons.jsonNode(List.of(Iterables.getLast(stateMessages))); + final JsonNode latestState = Jsons.jsonNode(supportsPerStream() ? stateMessages : List.of(Iterables.getLast(stateMessages))); final List secondSyncRecords = filterRecords(runRead(configuredCatalog, latestState)); assertTrue( secondSyncRecords.isEmpty(), diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java index 4ec58c6634c8..b23b8953fc82 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/CdcMySqlSourceAcceptanceTest.java @@ -175,7 +175,7 @@ public void testIncrementalSyncFailedIfBinlogIsDeleted() throws Exception { // when we run incremental sync again there should be no new records. Run a sync with the latest // state message and assert no records were emitted. - final JsonNode latestState = Jsons.jsonNode(List.of(Iterables.getLast(stateMessages))); + final JsonNode latestState = Jsons.jsonNode(supportsPerStream() ? stateMessages : List.of(Iterables.getLast(stateMessages))); // RESET MASTER removes all binary log files that are listed in the index file, // leaving only a single, empty binary log file with a numeric suffix of .000001 executeQuery("RESET MASTER;"); diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java index acd1da14241f..d9001c0bd6b5 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java @@ -134,4 +134,8 @@ protected JsonNode getState() { return Jsons.jsonNode(new HashMap<>()); } + @Override + protected boolean supportsPerStream() { + return true; + } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java index 479239983512..a26a8aa48ede 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java @@ -146,7 +146,7 @@ public static Optional extractState(final AirbyteStreamState stat /** * Tests whether the provided {@link StreamDescriptor} is valid. A valid descriptor is defined as - * one that has both a non-{@code null} name and non-{@code null} namespace. + * one that has a non-{@code null} name OR non-{@code null} namespace. * * @param streamDescriptor A {@link StreamDescriptor} to be validated. * @return {@code true} if the provided {@link StreamDescriptor} is valid or {@code false} if it is @@ -154,7 +154,7 @@ public static Optional extractState(final AirbyteStreamState stat */ public static boolean isValidStreamDescriptor(final StreamDescriptor streamDescriptor) { if (streamDescriptor != null) { - return streamDescriptor.getName() != null && streamDescriptor.getNamespace() != null; + return streamDescriptor.getName() != null || streamDescriptor.getNamespace() != null; } else { return false; } diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java index 7ef520137ffe..612b37c0528f 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java @@ -25,8 +25,8 @@ void testValidStreamDescriptor() { assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor1)); assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor2)); - assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor3)); - assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor4)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor3)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor4)); assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor5)); } From 969de1466d7535af2ff000df13acdbe4ddc1a3a2 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Thu, 16 Jun 2022 16:06:24 -0400 Subject: [PATCH 30/34] Use per-stream for postgres tests --- .../sources/AbstractSshPostgresSourceAcceptanceTest.java | 4 ++++ .../sources/PostgresSourceStrictEncryptAcceptanceTest.java | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java index 633e9715f59c..d8e083a5b465 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java @@ -135,4 +135,8 @@ protected JsonNode getState() { return Jsons.jsonNode(new HashMap<>()); } + @Override + protected boolean supportsPerStream() { + return true; + } } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java index 569d84d6e6cb..b16cf1073a61 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java @@ -130,4 +130,8 @@ protected JsonNode getState() { return Jsons.jsonNode(new HashMap<>()); } + @Override + protected boolean supportsPerStream() { + return true; + } } From 5c0ba555d32d06f422a80120fb363c0bebcdbe91 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Fri, 17 Jun 2022 10:13:01 -0400 Subject: [PATCH 31/34] Formatting --- .../source/jdbc/AbstractJdbcSourceAcceptanceTest.java | 3 ++- .../integrations/source/postgres/PostgresSource.java | 5 ++--- .../sources/AbstractSshPostgresSourceAcceptanceTest.java | 1 + .../sources/PostgresSourceAcceptanceTest.java | 1 + .../PostgresSourceStrictEncryptAcceptanceTest.java | 1 + .../source/relationaldb/AbstractDbSource.java | 1 + .../source/relationaldb/state/StateManagerFactory.java | 8 +++++--- .../source/relationaldb/state/LegacyStateManagerTest.java | 3 --- .../relationaldb/state/StateManagerFactoryTest.java | 5 +++-- 9 files changed, 16 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java index b8c0f28b17bf..01e1837b7992 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java @@ -129,7 +129,8 @@ public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } - // TODO This is a temporary override so that the Postgres source can take advantage of per-stream state + // TODO This is a temporary override so that the Postgres source can take advantage of per-stream + // state @Override protected List generateEmptyInitialState(final JsonNode config) { if (getSupportedStateType(config) == AirbyteStateType.GLOBAL) { diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 04c708fe7a8f..76aaa2c88d11 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -28,7 +28,6 @@ import io.airbyte.integrations.source.jdbc.dto.JdbcPrivilegeDto; import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.integrations.source.relationaldb.models.CdcState; -import io.airbyte.integrations.source.relationaldb.state.AirbyteStateMessageListTypeReference; import io.airbyte.integrations.source.relationaldb.state.StateManager; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; @@ -50,7 +49,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.function.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -411,7 +409,8 @@ private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) { return stream; } - // TODO This is a temporary override so that the Postgres source can take advantage of per-stream state + // TODO This is a temporary override so that the Postgres source can take advantage of per-stream + // state @Override protected List generateEmptyInitialState(final JsonNode config) { if (getSupportedStateType(config) == AirbyteStateType.GLOBAL) { diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java index d8e083a5b465..911a24f02f21 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java @@ -139,4 +139,5 @@ protected JsonNode getState() { protected boolean supportsPerStream() { return true; } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java index d9001c0bd6b5..623d2ef11e80 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java @@ -138,4 +138,5 @@ protected JsonNode getState() { protected boolean supportsPerStream() { return true; } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java index b16cf1073a61..6752036e504e 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceStrictEncryptAcceptanceTest.java @@ -134,4 +134,5 @@ protected JsonNode getState() { protected boolean supportsPerStream() { return true; } + } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java index 015bbfffb916..389d7e555432 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractDbSource.java @@ -535,6 +535,7 @@ protected List deserializeInitialState(final JsonNode initi /** * Generates an empty, initial state for use by the connector. + * * @param config The connector configuration. * @return The empty, initial state. */ diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java index b2d86c222464..a5dddedc9ebe 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactory.java @@ -27,11 +27,13 @@ public class StateManagerFactory { private StateManagerFactory() {} /** - * Creates a {@link StateManager} based on the provided state object and catalog. This method will handle the - * conversion of the provided state to match the requested state manager based on the provided {@link AirbyteStateType}. + * Creates a {@link StateManager} based on the provided state object and catalog. This method will + * handle the conversion of the provided state to match the requested state manager based on the + * provided {@link AirbyteStateType}. * * @param supportedStateType The type of state supported by the connector. - * @param initialState The deserialized initial state that will be provided to the selected {@link StateManager}. + * @param initialState The deserialized initial state that will be provided to the selected + * {@link StateManager}. * @param catalog The {@link ConfiguredAirbyteCatalog} for the connector that will utilize the state * manager. * @return A newly created {@link StateManager} implementation based on the provided state. diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java index f3ddbfd1f180..cbf41a7415e4 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/LegacyStateManagerTest.java @@ -21,14 +21,11 @@ import io.airbyte.integrations.source.relationaldb.models.CdcState; import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.protocol.models.AirbyteGlobalState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStateMessage.AirbyteStateType; import io.airbyte.protocol.models.AirbyteStream; -import io.airbyte.protocol.models.AirbyteStreamState; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import io.airbyte.protocol.models.StreamDescriptor; import java.util.Comparator; import java.util.List; import java.util.Map; diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java index 2a68f218e59b..0127b068915a 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateManagerFactoryTest.java @@ -42,7 +42,7 @@ void testNullOrEmptyState() { }); Assertions.assertThrows(IllegalArgumentException.class, () -> { - StateManagerFactory.createStateManager(AirbyteStateType.LEGACY,null, catalog); + StateManagerFactory.createStateManager(AirbyteStateType.LEGACY, null, catalog); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { @@ -166,7 +166,8 @@ void testStreamStateManagerCreationFromGlobal() { .withStreamState(Jsons.jsonNode(new DbStreamState())))); final AirbyteStateMessage airbyteStateMessage = new AirbyteStateMessage().withStateType(AirbyteStateType.GLOBAL).withGlobal(globalState); - Assertions.assertThrows(IllegalArgumentException.class, () -> StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog)); + Assertions.assertThrows(IllegalArgumentException.class, + () -> StateManagerFactory.createStateManager(AirbyteStateType.STREAM, List.of(airbyteStateMessage), catalog)); } @Test From cf079bed27511ed98f66aa95eab55759d907f004 Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Fri, 17 Jun 2022 11:34:36 -0400 Subject: [PATCH 32/34] Correct stream descriptor validation --- .../source/relationaldb/state/StateGeneratorUtils.java | 7 +++++-- .../relationaldb/state/StateGeneratorUtilsTest.java | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java index a26a8aa48ede..36da75bd2f0e 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java @@ -146,7 +146,10 @@ public static Optional extractState(final AirbyteStreamState stat /** * Tests whether the provided {@link StreamDescriptor} is valid. A valid descriptor is defined as - * one that has a non-{@code null} name OR non-{@code null} namespace. + * one that has a non-{@code null} name. + * + * See https://github.com/airbytehq/airbyte/blob/22b727c0ea213376b7164ffd8cdbbfa7fd74c26c/docs/understanding-airbyte/airbyte-protocol.md + * for more details * * @param streamDescriptor A {@link StreamDescriptor} to be validated. * @return {@code true} if the provided {@link StreamDescriptor} is valid or {@code false} if it is @@ -154,7 +157,7 @@ public static Optional extractState(final AirbyteStreamState stat */ public static boolean isValidStreamDescriptor(final StreamDescriptor streamDescriptor) { if (streamDescriptor != null) { - return streamDescriptor.getName() != null || streamDescriptor.getNamespace() != null; + return streamDescriptor.getName() != null; } else { return false; } diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java index 612b37c0528f..9ac94775c928 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtilsTest.java @@ -22,12 +22,18 @@ void testValidStreamDescriptor() { final StreamDescriptor streamDescriptor3 = new StreamDescriptor().withName("name"); final StreamDescriptor streamDescriptor4 = new StreamDescriptor().withNamespace("namespace"); final StreamDescriptor streamDescriptor5 = new StreamDescriptor().withName("name").withNamespace("namespace"); + final StreamDescriptor streamDescriptor6 = new StreamDescriptor().withName("name").withNamespace(""); + final StreamDescriptor streamDescriptor7 = new StreamDescriptor().withName("").withNamespace("namespace"); + final StreamDescriptor streamDescriptor8 = new StreamDescriptor().withName("").withNamespace(""); assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor1)); assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor2)); assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor3)); - assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor4)); + assertFalse(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor4)); assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor5)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor6)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor7)); + assertTrue(StateGeneratorUtils.isValidStreamDescriptor(streamDescriptor8)); } } From 12dd14c154f776689b661bed634c30ee1ca0f5fc Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Fri, 17 Jun 2022 11:35:45 -0400 Subject: [PATCH 33/34] Correct permalink --- .../source/relationaldb/state/StateGeneratorUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java index 36da75bd2f0e..493defb95e9f 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/StateGeneratorUtils.java @@ -148,7 +148,7 @@ public static Optional extractState(final AirbyteStreamState stat * Tests whether the provided {@link StreamDescriptor} is valid. A valid descriptor is defined as * one that has a non-{@code null} name. * - * See https://github.com/airbytehq/airbyte/blob/22b727c0ea213376b7164ffd8cdbbfa7fd74c26c/docs/understanding-airbyte/airbyte-protocol.md + * See https://github.com/airbytehq/airbyte/blob/e63458fabb067978beb5eaa74d2bc130919b419f/docs/understanding-airbyte/airbyte-protocol.md * for more details * * @param streamDescriptor A {@link StreamDescriptor} to be validated. From ef81d69814a21ae50787a250969dd3e0ed63f75a Mon Sep 17 00:00:00 2001 From: jdpgrailsdev Date: Fri, 17 Jun 2022 14:50:55 -0400 Subject: [PATCH 34/34] PR feedback --- .../integrations/source/relationaldb/state/CursorManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java index 86038797e55b..207b51ad5bad 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/state/CursorManager.java @@ -92,9 +92,9 @@ protected Map createCursorInfoMap( final Map localMap = new HashMap<>(); final Map pairToState = streamSupplier.get() .stream() - .collect(Collectors.toMap(namespacePairFunction, a -> a)); + .collect(Collectors.toMap(namespacePairFunction,Function.identity())); final Map pairToConfiguredAirbyteStream = catalog.getStreams().stream() - .collect(Collectors.toMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, s -> s)); + .collect(Collectors.toMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, Function.identity())); for (final AirbyteStreamNameNamespacePair pair : allStreamNames) { final Optional stateOptional = Optional.ofNullable(pairToState.get(pair));