-
Notifications
You must be signed in to change notification settings - Fork 4.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
4776: Python CDK: Validate input config.py against spec #5457
Changes from 1 commit
d8a7437
db9609c
278e0af
a57965d
035f665
c019bce
e3d5279
c97588b
2896816
aa93cea
86e7457
e29552e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,11 @@ | ||
# Changelog | ||
|
||
## 0.1.10 | ||
Add checking specified config againt spec for read, write, check and ddiscover commands | ||
|
||
##0.1.9 | ||
remove this line after rebase | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rudiment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, its cause Artur has his MR with 0.1.9 version published so my is 0.1.10 https://pypi.org/project/airbyte-cdk/ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm waiting when he will be ready, rebase my changes onto his and update this file |
||
|
||
## 0.1.8 | ||
Allow to fetch primary key info from singer catalog | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,37 +30,39 @@ | |
|
||
from airbyte_cdk import AirbyteLogger | ||
from airbyte_cdk.connector import Connector | ||
from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, Type | ||
from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, ConnectorSpecification, Type | ||
from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec | ||
from pydantic import ValidationError | ||
|
||
|
||
class Destination(Connector, ABC): | ||
logger = AirbyteLogger() | ||
VALID_CMDS = {"spec", "check", "write"} | ||
|
||
@abstractmethod | ||
def write( | ||
self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] | ||
) -> Iterable[AirbyteMessage]: | ||
"""Implement to define how the connector writes data to the destination""" | ||
|
||
def _run_spec(self) -> AirbyteMessage: | ||
return AirbyteMessage(type=Type.SPEC, spec=self.spec(self.logger)) | ||
def _run_spec(self, spec: ConnectorSpecification) -> AirbyteMessage: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should delete this method, calling it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
return AirbyteMessage(type=Type.SPEC, spec=spec) | ||
|
||
def _run_check(self, config_path: str) -> AirbyteMessage: | ||
config = self.read_config(config_path=config_path) | ||
def _run_check(self, config: Mapping[str, Any]) -> AirbyteMessage: | ||
check_result = self.check(self.logger, config) | ||
return AirbyteMessage(type=Type.CONNECTION_STATUS, connectionStatus=check_result) | ||
|
||
def _parse_input_stream(self, input_stream: io.TextIOWrapper) -> Iterable[AirbyteMessage]: | ||
""" Reads from stdin, converting to Airbyte messages""" | ||
"""Reads from stdin, converting to Airbyte messages""" | ||
for line in input_stream: | ||
try: | ||
yield AirbyteMessage.parse_raw(line) | ||
except ValidationError: | ||
self.logger.info(f"ignoring input which can't be deserialized as Airbyte Message: {line}") | ||
|
||
def _run_write(self, config_path: str, configured_catalog_path: str, input_stream: io.TextIOWrapper) -> Iterable[AirbyteMessage]: | ||
config = self.read_config(config_path=config_path) | ||
def _run_write( | ||
self, config: Mapping[str, Any], configured_catalog_path: str, input_stream: io.TextIOWrapper | ||
) -> Iterable[AirbyteMessage]: | ||
catalog = ConfiguredAirbyteCatalog.parse_file(configured_catalog_path) | ||
input_messages = self._parse_input_stream(input_stream) | ||
self.logger.info("Begin writing to the destination...") | ||
|
@@ -104,18 +106,24 @@ def parse_args(self, args: List[str]) -> argparse.Namespace: | |
|
||
def run_cmd(self, parsed_args: argparse.Namespace) -> Iterable[AirbyteMessage]: | ||
cmd = parsed_args.command | ||
if cmd not in self.VALID_CMDS: | ||
raise Exception(f"Unrecognized command: {cmd}") | ||
|
||
spec = self.spec(self.logger) | ||
if cmd == "spec": | ||
yield self._run_spec() | ||
elif cmd == "check": | ||
yield self._run_check(config_path=parsed_args.config) | ||
yield self._run_spec(spec) | ||
return | ||
config = self.read_config(config_path=parsed_args.config) | ||
check_error_msg = check_config_against_spec(config, spec) | ||
if check_error_msg: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. an invalid config should cause the exit code to be >0, it's invalid input. An exit code of 0 means the connector succeeded There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
self.logger.error(check_error_msg) | ||
return | ||
if cmd == "check": | ||
yield self._run_check(config=config) | ||
elif cmd == "write": | ||
# Wrap in UTF-8 to override any other input encodings | ||
wrapped_stdin = io.TextIOWrapper(sys.stdin.buffer, encoding="utf-8") | ||
yield from self._run_write( | ||
config_path=parsed_args.config, configured_catalog_path=parsed_args.catalog, input_stream=wrapped_stdin | ||
) | ||
else: | ||
raise Exception(f"Unrecognized command: {cmd}") | ||
yield from self._run_write(config=config, configured_catalog_path=parsed_args.catalog, input_stream=wrapped_stdin) | ||
|
||
def run(self, args: List[str]): | ||
parsed_args = self.parse_args(args) | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -26,10 +26,13 @@ | |||||
import json | ||||||
import os | ||||||
import pkgutil | ||||||
from typing import Dict | ||||||
from typing import Any, Dict, Mapping, Union | ||||||
|
||||||
import pkg_resources | ||||||
from jsonschema import RefResolver | ||||||
|
||||||
from airbyte_cdk.models import ConnectorSpecification | ||||||
from jsonschema import RefResolver, validate | ||||||
from jsonschema.exceptions import ValidationError | ||||||
|
||||||
|
||||||
class JsonSchemaResolver: | ||||||
|
@@ -124,3 +127,18 @@ def get_schema(self, name: str) -> dict: | |||||
if os.path.exists(shared_schemas_folder): | ||||||
return JsonSchemaResolver(shared_schemas_folder).resolve(raw_schema) | ||||||
return raw_schema | ||||||
|
||||||
|
||||||
def check_config_against_spec(config: Mapping[str, Any], spec: ConnectorSpecification) -> Union[str, None]: | ||||||
""" | ||||||
Check config object against spec. | ||||||
:param config - config loaded from file specified over command line | ||||||
:param spec - spec object generated by connector | ||||||
:return Error message in case validation failed, None otherwise | ||||||
""" | ||||||
spec_schema = spec.connectionSpecification | ||||||
try: | ||||||
validate(instance=config, schema=spec_schema) | ||||||
return None | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||||||
except ValidationError as validation_error: | ||||||
return "Config validation error: " + validation_error.message |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,7 @@ | |
|
||
setup( | ||
name="airbyte-cdk", | ||
version="0.1.8", | ||
version="0.1.10", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you'll need to either merge this with arthur's changes or release separately There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ive merged Arthur's branch into mine so now this PR looks dirty before his branch will be merged on master. |
||
description="A framework for writing Airbyte Connectors.", | ||
long_description=README, | ||
long_description_content_type="text/markdown", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -180,6 +180,7 @@ def test_run_check(self, mocker, destination: Destination, tmp_path): | |
parsed_args = argparse.Namespace(**args) | ||
destination.run_cmd(parsed_args) | ||
|
||
mocker.patch.object(destination, "spec", return_value=ConnectorSpecification(connectionSpecification={})) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should add a test case here too to verify the config is validated. It may help to look at test coverage by running There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is already detailed test case that covers validation function here: https://github.com/airbytehq/airbyte/pull/5457/files#diff-0d9087e19f5bdabe137529bb861aaaa261b39d5344e79041602a2a94c7530301R150 So Ive just added checks if this validation function is called with correct args for destination. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, done |
||
expected_check_result = AirbyteConnectionStatus(status=Status.SUCCEEDED) | ||
mocker.patch.object(destination, "check", return_value=expected_check_result, autospec=True) | ||
|
||
|
@@ -216,6 +217,7 @@ def test_run_write(self, mocker, destination: Destination, tmp_path, monkeypatch | |
mocker.patch.object( | ||
destination, "write", return_value=iter(expected_write_result), autospec=True # convert to iterator to mimic real usage | ||
) | ||
mocker.patch.object(destination, "spec", return_value=ConnectorSpecification(connectionSpecification={})) | ||
# mock input is a record followed by some state messages | ||
mocked_input: List[AirbyteMessage] = [_wrapped(_record("s1", {"k1": "v1"})), *expected_write_result] | ||
mocked_stdin_string = "\n".join([record.json(exclude_unset=True) for record in mocked_input]) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated