-
Notifications
You must be signed in to change notification settings - Fork 4.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove withRefreshedCatalog param from updateConnection endpoint #14477
Changes from all commits
bdb84a8
8eacbb5
90e3bf0
92ca94c
6cf2198
fc50bc3
7836a64
692c15c
cea8b24
bacb55a
ee83315
3c49ef1
4269715
b24ae1c
cabee58
2e7c474
38cdeed
bec8f7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -35,6 +35,8 @@ | |||||||||||||||||||||||||||||||||
import io.airbyte.api.model.generated.SourceDiscoverSchemaRequestBody; | ||||||||||||||||||||||||||||||||||
import io.airbyte.api.model.generated.SourceIdRequestBody; | ||||||||||||||||||||||||||||||||||
import io.airbyte.api.model.generated.SourceRead; | ||||||||||||||||||||||||||||||||||
import io.airbyte.api.model.generated.StreamDescriptor; | ||||||||||||||||||||||||||||||||||
import io.airbyte.api.model.generated.StreamTransform; | ||||||||||||||||||||||||||||||||||
import io.airbyte.api.model.generated.WebBackendConnectionCreate; | ||||||||||||||||||||||||||||||||||
import io.airbyte.api.model.generated.WebBackendConnectionRead; | ||||||||||||||||||||||||||||||||||
import io.airbyte.api.model.generated.WebBackendConnectionReadList; | ||||||||||||||||||||||||||||||||||
|
@@ -50,7 +52,11 @@ | |||||||||||||||||||||||||||||||||
import io.airbyte.commons.lang.MoreBooleans; | ||||||||||||||||||||||||||||||||||
import io.airbyte.config.persistence.ConfigNotFoundException; | ||||||||||||||||||||||||||||||||||
import io.airbyte.config.persistence.ConfigRepository; | ||||||||||||||||||||||||||||||||||
import io.airbyte.protocol.models.CatalogHelpers; | ||||||||||||||||||||||||||||||||||
import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; | ||||||||||||||||||||||||||||||||||
import io.airbyte.scheduler.client.EventRunner; | ||||||||||||||||||||||||||||||||||
import io.airbyte.server.converters.ProtocolConverters; | ||||||||||||||||||||||||||||||||||
import io.airbyte.server.handlers.helpers.CatalogConverter; | ||||||||||||||||||||||||||||||||||
import io.airbyte.validation.json.JsonValidationException; | ||||||||||||||||||||||||||||||||||
import io.airbyte.workers.temporal.TemporalClient.ManualOperationResult; | ||||||||||||||||||||||||||||||||||
import java.io.IOException; | ||||||||||||||||||||||||||||||||||
|
@@ -252,7 +258,7 @@ public WebBackendConnectionRead webBackendGetConnection(final WebBackendConnecti | |||||||||||||||||||||||||||||||||
* but was present at time of configuration will appear in the diff as an added stream which is | ||||||||||||||||||||||||||||||||||
* confusing. We need to figure out why source_catalog_id is not always populated in the db. | ||||||||||||||||||||||||||||||||||
*/ | ||||||||||||||||||||||||||||||||||
diff = ConnectionsHandler.getDiff(catalogUsedToMakeConfiguredCatalog.orElse(configuredCatalog), refreshedCatalog.get().getCatalog()); | ||||||||||||||||||||||||||||||||||
diff = connectionsHandler.getDiff(catalogUsedToMakeConfiguredCatalog.orElse(configuredCatalog), refreshedCatalog.get().getCatalog()); | ||||||||||||||||||||||||||||||||||
} else if (catalogUsedToMakeConfiguredCatalog.isPresent()) { | ||||||||||||||||||||||||||||||||||
// reconstructs a full picture of the full schema at the time the catalog was configured. | ||||||||||||||||||||||||||||||||||
syncCatalog = updateSchemaWithDiscovery(configuredCatalog, catalogUsedToMakeConfiguredCatalog.get()); | ||||||||||||||||||||||||||||||||||
|
@@ -371,6 +377,45 @@ public WebBackendConnectionRead webBackendUpdateConnection(final WebBackendConne | |||||||||||||||||||||||||||||||||
verifyManualOperationResult(manualOperationResult); | ||||||||||||||||||||||||||||||||||
connectionRead = connectionsHandler.getConnection(connectionUpdate.getConnectionId()); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
return buildWebBackendConnectionRead(connectionRead); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
public WebBackendConnectionRead webBackendUpdateConnectionNew(final WebBackendConnectionUpdate webBackendConnectionUpdate) | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure we want to fail since extra parameters don't really hurt anything There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is probably fine for now, once we consolidate the endpoints, we should clean it up or at least mark it as obsolete. It will save someone some headaches trying to understand why we have the some useless options. |
||||||||||||||||||||||||||||||||||
throws ConfigNotFoundException, IOException, JsonValidationException { | ||||||||||||||||||||||||||||||||||
final List<UUID> operationIds = updateOperations(webBackendConnectionUpdate); | ||||||||||||||||||||||||||||||||||
final ConnectionUpdate connectionUpdate = toConnectionUpdate(webBackendConnectionUpdate, operationIds); | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
final UUID connectionId = webBackendConnectionUpdate.getConnectionId(); | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
final ConfiguredAirbyteCatalog existingConfiguredCatalog = | ||||||||||||||||||||||||||||||||||
configRepository.getConfiguredCatalogForConnection(connectionId); | ||||||||||||||||||||||||||||||||||
final io.airbyte.protocol.models.AirbyteCatalog existingCatalog = CatalogHelpers.configuredCatalogToCatalog(existingConfiguredCatalog); | ||||||||||||||||||||||||||||||||||
final AirbyteCatalog apiExistingCatalog = CatalogConverter.toApi(existingCatalog); | ||||||||||||||||||||||||||||||||||
final AirbyteCatalog newAirbyteCatalog = webBackendConnectionUpdate.getSyncCatalog(); | ||||||||||||||||||||||||||||||||||
final CatalogDiff catalogDiff = connectionsHandler.getDiff(apiExistingCatalog, newAirbyteCatalog); | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was just reading through the I think this is a problem, because it means that if, for example, a user just changes the cursor field of their stream, we will not perform a reset of that stream because that configuration is lost when we convert from configured catalog to catalog. This could lead to strange issues like the source trying to use the cursor value of one column as the cursor of another, and could be very tough to fix. We probably want to keep the current diff logic for returning the diff to the frontend for showing the changes when a user refreshes source schema, but I think the above issue means we need to have a second diff calculation method that compares the configured fields of the streams as well, so that we also perform a reset in cases where only the configuration of a stream is changed. FYI @benmoriceau and @cgardens as the original implementer of the diff logic, does the above sound correct to you? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current behavior is to propose to perform a reset if the cursor change (probably also true i). Should we also keep it as optional as well? When a sync mode is changed (e.g.: from full refresh to incremental) the cursor will also change but we might not want to perform a reset in this case. We can calculate the diff here but I am not sure about the expected behavior regarding the reset. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is address in #14626 |
||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
final List<StreamDescriptor> apiStreamsToReset = getStreamsToReset(catalogDiff); | ||||||||||||||||||||||||||||||||||
List<io.airbyte.protocol.models.StreamDescriptor> streamsToReset = | ||||||||||||||||||||||||||||||||||
apiStreamsToReset.stream().map(ProtocolConverters::streamDescriptorToProtocol).toList(); | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
ConnectionRead connectionRead; | ||||||||||||||||||||||||||||||||||
connectionRead = connectionsHandler.updateConnection(connectionUpdate); | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
if (!streamsToReset.isEmpty()) { | ||||||||||||||||||||||||||||||||||
final ConnectionIdRequestBody connectionIdRequestBody = new ConnectionIdRequestBody().connectionId(connectionId); | ||||||||||||||||||||||||||||||||||
final ConnectionStateType stateType = getStateType(connectionIdRequestBody); | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
if (stateType == ConnectionStateType.LEGACY || stateType == ConnectionStateType.NOT_SET || stateType == ConnectionStateType.GLOBAL) { | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The plan was to change this to remove the I am concerned this could lead to some issues, such as the following scenario:
In this case, since the postgres source doesn't have that new logic, it could lead to bad behavior, e.g. the source just continuing to sync all streams from the current cdc cursor, or not syncing the streams that were reset at all. @subodh1810 @benmoriceau @gosusnp FYI, I'm not super sure how we should address this issue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@lmossman can we force them to migrate to the right postgres connector with the seed file? If they don't downgrade it should ensure that we will use the right version. There might still be an issue with companies that are using the public connectors as a custom one. I think that datadog is doing that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah that's a good point, I had thought that we had logic to prevent updating a connector if it was actively being used in a deployment, but it looks like we actually do update connectors even if they are in use Lines 1812 to 1817 in 900c212
For the custom connector case, we may just have to try to communicate that those will have to be rebuilt on the latest version There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, one more thing I think we discussed is that we probably want to allow the diff result The state type will be @benmoriceau or @andyjih do you agree with the above? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't this be handled correctly by the empty airbyte source reset whether we pass in streamsToReset or all streams in a connection? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The EmptyAirbyteSource will act the same way in either case - it will output no state, since none exists; the difference is in this logic where we decide whether to set the destination sync mode of a stream in the catalog to Lines 102 to 111 in 4e80a67
What I'm proposing here is that when the state is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I wonder how many FULL_REFRESH APPEND are present. This will be tricky because the sync mode is per stream and not per connection... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @benmoriceau I don't think the current sync mode configuration of the connection matters when we are performing a reset, since from the code I linked above you can see that we are choosing which sync mode to use for each stream in the catalog when performing a reset. So what I was saying was that in the case that we are updating a connection whose state is currently And all of the above can be accomplished by just setting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @alovew since we are now adding this as a separate endpoint, I think it probably makes sense to just make the change now to remove |
||||||||||||||||||||||||||||||||||
streamsToReset = configRepository.getAllStreamsForConnection(connectionId); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
ManualOperationResult manualOperationResult = eventRunner.synchronousResetConnection( | ||||||||||||||||||||||||||||||||||
webBackendConnectionUpdate.getConnectionId(), | ||||||||||||||||||||||||||||||||||
streamsToReset); | ||||||||||||||||||||||||||||||||||
verifyManualOperationResult(manualOperationResult); | ||||||||||||||||||||||||||||||||||
manualOperationResult = eventRunner.startNewManualSync(webBackendConnectionUpdate.getConnectionId()); | ||||||||||||||||||||||||||||||||||
verifyManualOperationResult(manualOperationResult); | ||||||||||||||||||||||||||||||||||
connectionRead = connectionsHandler.getConnection(connectionUpdate.getConnectionId()); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
return buildWebBackendConnectionRead(connectionRead); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
@@ -488,6 +533,11 @@ protected static ConnectionSearch toConnectionSearch(final WebBackendConnectionS | |||||||||||||||||||||||||||||||||
.status(webBackendConnectionSearch.getStatus()); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
@VisibleForTesting | ||||||||||||||||||||||||||||||||||
static List<StreamDescriptor> getStreamsToReset(final CatalogDiff catalogDiff) { | ||||||||||||||||||||||||||||||||||
return catalogDiff.getTransforms().stream().map(StreamTransform::getStreamDescriptor).toList(); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
/** | ||||||||||||||||||||||||||||||||||
* Equivalent to {@see io.airbyte.integrations.base.AirbyteStreamNameNamespacePair}. Intentionally | ||||||||||||||||||||||||||||||||||
* not using that class because it doesn't make sense for airbyte-server to depend on | ||||||||||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this should be commented out