diff --git a/airbyte-config/init/src/main/resources/icons/dockerhub.svg b/airbyte-config/init/src/main/resources/icons/dockerhub.svg new file mode 100644 index 000000000000..a8728893131d --- /dev/null +++ b/airbyte-config/init/src/main/resources/icons/dockerhub.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7bdc4054c69f..0d4e758d320d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -213,6 +213,14 @@ icon: dixa.svg sourceType: api releaseStage: alpha +- name: Dockerhub + sourceDefinitionId: 72d405a3-56d8-499f-a571-667c03406e43 + dockerRepository: airbyte/source-dockerhub + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/dockerhub + icon: dockerhub.svg + sourceType: api + releaseStage: alpha - name: Drift sourceDefinitionId: 445831eb-78db-4b1f-8f1f-0d96ad8739e2 dockerRepository: airbyte/source-drift diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 0dd75580c048..9a219813db7b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1608,6 +1608,27 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-dockerhub:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/dockerhub" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Dockerhub Spec" + type: "object" + required: + - "docker_username" + additionalProperties: false + properties: + docker_username: + type: "string" + description: "Username of DockerHub person or organization (for https://hub.docker.com/v2/repositories/USERNAME/\ + \ API call)" + pattern: "^[a-z0-9_\\-]+$" + examples: + - "airbyte" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-drift:0.2.5" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/drift" diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 1c35f3dbf12b..a6c4def12227 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -26,6 +26,7 @@ | Close.com | [![source-close-com](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-close-com%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-close-com/) | | Delighted | [![source-delighted](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-delighted%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-delighted) | | Dixa | [![source-dixa](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-dixa%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-dixa) | +| Dockerhub | [![source-dockerhub](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-dockerhub%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-dockerhub) | | Drift | [![source-drift](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-drift%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-drift) | | End-to-End Testing | [![source-e2e-test](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-e2e-test%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-e2e-test) | | Exchange Rates API | [![source-exchange-rates](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-exchange-rates%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-exchange-rates) | diff --git a/airbyte-integrations/connectors/source-dockerhub/.dockerignore b/airbyte-integrations/connectors/source-dockerhub/.dockerignore new file mode 100644 index 000000000000..e311a05884c4 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_dockerhub +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-dockerhub/Dockerfile b/airbyte-integrations/connectors/source-dockerhub/Dockerfile new file mode 100644 index 000000000000..058503c031b4 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_dockerhub ./source_dockerhub + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-dockerhub diff --git a/airbyte-integrations/connectors/source-dockerhub/README.md b/airbyte-integrations/connectors/source-dockerhub/README.md new file mode 100644 index 000000000000..043b7f07bd9f --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/README.md @@ -0,0 +1,145 @@ +# Dockerhub Source + +This is the repository for the Dockerhub source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/dockerhub) (not active yet). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-dockerhub:build +``` + + + +### Locally running the connector +``` +python main.py spec +python main.py check --config sample_files/config.json +python main.py discover --config sample_files/config.json +python main.py read --config sample_files/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-dockerhub:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-dockerhub:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-dockerhub:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-dockerhub:dev check --config /sample_files/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-dockerhub:dev discover --config /sample_files/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-dockerhub:dev read --config /sample_files/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-dockerhub:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-dockerhub:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-dockerhub/acceptance-test-config.yml b/airbyte-integrations/connectors/source-dockerhub/acceptance-test-config.yml new file mode 100644 index 000000000000..353c0fd0ae64 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/acceptance-test-config.yml @@ -0,0 +1,24 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-dockerhub:dev +tests: + spec: + - spec_path: "source_dockerhub/spec.yaml" + connection: + - config_path: "sample_files/config.json" + status: "succeed" + # even with an incorrect username the api still returns 200 so just ignoring the invalid config check for now + # - config_path: "integration_tests/invalid_config.json" + # status: "failed" + discovery: + - config_path: "sample_files/config.json" + basic_read: + - config_path: "sample_files/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + full_refresh: + - config_path: "sample_files/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + # testing sequentially for same results can fail because of pull counts increasing for an image between runs + ignored_fields: + "docker_hub": ["pull_count", "last_updated"] diff --git a/airbyte-integrations/connectors/source-dockerhub/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-dockerhub/acceptance-test-docker.sh new file mode 100644 index 000000000000..c51577d10690 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-dockerhub/bootstrap.md b/airbyte-integrations/connectors/source-dockerhub/bootstrap.md new file mode 100644 index 000000000000..0c0f4fdec9b0 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/bootstrap.md @@ -0,0 +1,14 @@ +# Dockerhub Source API + +- Origin issue/discussion: https://github.com/airbytehq/airbyte/issues/12773 +- API docs: https://docs.docker.com/registry/spec/api/ +- Helpful StackOverflow answer on DockerHub API auth call: https://stackoverflow.com/questions/56193110/how-can-i-use-docker-registry-http-api-v2-to-obtain-a-list-of-all-repositories-i#answer-68654659 + +All API calls need to be authenticated, but for public info, you can just obtain a short lived token from [this endpoint](https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/alpine:pull) without any username/password, so this is what we have done for simplicity. + +If you are reading this in the future and need to expand this source connector to include private data, do take note that you'll need to add the `/secrets/config.json` files and change the auth strategy (we think it takes either HTTP basic auth or Oauth2 to the same endpoint, with the right scope): + +- Original notes: https://github.com/airbytehq/airbyte/issues/12773#issuecomment-1126785570 +- Auth docs: https://docs.docker.com/registry/spec/auth/jwt/ +- Might also want to use OAuth2: https://docs.docker.com/registry/spec/auth/oauth/ +- Scope docs: https://docs.docker.com/registry/spec/auth/scope/ \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-dockerhub/build.gradle b/airbyte-integrations/connectors/source-dockerhub/build.gradle new file mode 100644 index 000000000000..4cc7b7adfa87 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_dockerhub' +} diff --git a/airbyte-integrations/connectors/source-dockerhub/integration_tests/__init__.py b/airbyte-integrations/connectors/source-dockerhub/integration_tests/__init__.py new file mode 100644 index 000000000000..46b7376756ec --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-dockerhub/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-dockerhub/integration_tests/abnormal_state.json new file mode 100644 index 000000000000..52b0f2c2118f --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-dockerhub/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-dockerhub/integration_tests/acceptance.py new file mode 100644 index 000000000000..1a6f55e7224b --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/integration_tests/acceptance.py @@ -0,0 +1,20 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +import os +import pathlib +import shutil + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This source doesn't have any secrets, so this copies the sample_files config into secrets/ for acceptance tests""" + src_folder = pathlib.Path(__file__).parent.parent.resolve() + os.makedirs(f"{src_folder}/secrets", exist_ok=True) + shutil.copy(f"{src_folder}/sample_files/config.json", f"{src_folder}/secrets/") diff --git a/airbyte-integrations/connectors/source-dockerhub/integration_tests/catalog.json b/airbyte-integrations/connectors/source-dockerhub/integration_tests/catalog.json new file mode 100644 index 000000000000..9627353a77b8 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/integration_tests/catalog.json @@ -0,0 +1,62 @@ +{ + "streams": [ + { + "name": "docker_hub", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "user": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "namespace": { + "type": ["null", "string"] + }, + "repository_type": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "integer"] + }, + "description": { + "type": ["null", "string"] + }, + "is_private": { + "type": ["null", "boolean"] + }, + "is_automated": { + "type": ["null", "boolean"] + }, + "can_edit": { + "type": ["null", "boolean"] + }, + "star_count": { + "type": ["null", "integer"] + }, + "pull_count": { + "type": ["null", "integer"] + }, + "last_updated": { + "type": ["null", "string"] + }, + "is_migrated": { + "type": ["null", "boolean"] + }, + "collaborator_count": { + "type": ["null", "integer"] + }, + "affiliation": { + "type": ["null", "string"] + }, + "hub_user": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["full_refresh"] + } + ] +} diff --git a/airbyte-integrations/connectors/source-dockerhub/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-dockerhub/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..6f8198f3af96 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/integration_tests/configured_catalog.json @@ -0,0 +1,67 @@ +{ + "streams": [ + { + "stream": { + "name": "docker_hub", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "user": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "namespace": { + "type": ["null", "string"] + }, + "repository_type": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "integer"] + }, + "description": { + "type": ["null", "string"] + }, + "is_private": { + "type": ["null", "boolean"] + }, + "is_automated": { + "type": ["null", "boolean"] + }, + "can_edit": { + "type": ["null", "boolean"] + }, + "star_count": { + "type": ["null", "integer"] + }, + "pull_count": { + "type": ["null", "integer"] + }, + "last_updated": { + "type": ["null", "string"] + }, + "is_migrated": { + "type": ["null", "boolean"] + }, + "collaborator_count": { + "type": ["null", "integer"] + }, + "affiliation": { + "type": ["null", "string"] + }, + "hub_user": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["full_refresh"] + }, + "source_defined_cursor": false, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-dockerhub/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-dockerhub/integration_tests/invalid_config.json new file mode 100644 index 000000000000..dc1c9833fc58 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/integration_tests/invalid_config.json @@ -0,0 +1,3 @@ +{ + "docker_username": "8cf32219-675f-41c3-a879-adc79f6e670e-475f57f0-8037-4ff0-93df-a913fb8fb055" +} diff --git a/airbyte-integrations/connectors/source-dockerhub/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-dockerhub/integration_tests/sample_state.json new file mode 100644 index 000000000000..3587e579822d --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-dockerhub/main.py b/airbyte-integrations/connectors/source-dockerhub/main.py new file mode 100644 index 000000000000..a22bd2c1febc --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_dockerhub import SourceDockerhub + +if __name__ == "__main__": + source = SourceDockerhub() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-dockerhub/requirements.txt b/airbyte-integrations/connectors/source-dockerhub/requirements.txt new file mode 100644 index 000000000000..0411042aa091 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-dockerhub/sample_files/config.json b/airbyte-integrations/connectors/source-dockerhub/sample_files/config.json new file mode 100644 index 000000000000..e9c198a5974f --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/sample_files/config.json @@ -0,0 +1,3 @@ +{ + "docker_username": "airbyte" +} diff --git a/airbyte-integrations/connectors/source-dockerhub/setup.py b/airbyte-integrations/connectors/source-dockerhub/setup.py new file mode 100644 index 000000000000..f382fbc56177 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/setup.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk~=0.1", "requests~=2.28.0"] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_dockerhub", + description="Source implementation for Dockerhub.", + author="Airbyte", + author_email="shawn@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/__init__.py b/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/__init__.py new file mode 100644 index 000000000000..4961990cca6c --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceDockerhub + +__all__ = ["SourceDockerhub"] diff --git a/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/schemas/docker_hub.json b/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/schemas/docker_hub.json new file mode 100644 index 000000000000..f72e7df20c30 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/schemas/docker_hub.json @@ -0,0 +1,54 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "user": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "namespace": { + "type": ["null", "string"] + }, + "repository_type": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "integer"] + }, + "description": { + "type": ["null", "string"] + }, + "is_private": { + "type": ["null", "boolean"] + }, + "is_automated": { + "type": ["null", "boolean"] + }, + "can_edit": { + "type": ["null", "boolean"] + }, + "star_count": { + "type": ["null", "integer"] + }, + "pull_count": { + "type": ["null", "integer"] + }, + "last_updated": { + "type": ["null", "string"] + }, + "is_migrated": { + "type": ["null", "boolean"] + }, + "collaborator_count": { + "type": ["null", "integer"] + }, + "affiliation": { + "type": ["null", "string"] + }, + "hub_user": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/source.py b/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/source.py new file mode 100644 index 000000000000..00f1800efcd2 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/source.py @@ -0,0 +1,89 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# +import logging +from typing import Any, Iterable, List, Mapping, Optional, Tuple +from urllib.parse import urlparse + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream + +logger = logging.getLogger("airbyte") + + +class SourceDockerhub(AbstractSource): + jwt = None + + def check_connection(self, logger, config) -> Tuple[bool, any]: + username = config["docker_username"] + + # get JWT + jwt_url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/alpine:pull" + response = requests.get(jwt_url) + self.jwt = response.json()["token"] + + # check that jwt is valid and that username is valid + url = f"https://hub.docker.com/v2/repositories/{username}/" + try: + response = requests.get(url, headers={"Authorization": self.jwt}) + response.raise_for_status() + except requests.exceptions.HTTPError as e: + if e.response.status_code == 401: + logger.info(str(e)) + return False, "Invalid JWT received, check if auth.docker.io changed API" + elif e.response.status_code == 404: + logger.info(str(e)) + return False, f"User '{username}' not found, check if hub.docker.com/u/{username} exists" + else: + logger.info(str(e)) + return False, f"Error getting basic user info for Docker user '{username}', unexpected error" + json_response = response.json() + repocount = json_response["count"] + logger.info(f"Connection check for Docker user '{username}' successful: {repocount} repos found") + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + return [DockerHub(jwt=self.jwt, config=config)] + + +class DockerHub(HttpStream): + url_base = "https://hub.docker.com/v2" + + # Set this as a noop. + primary_key = None + + def __init__(self, jwt: str, config: Mapping[str, Any], **kwargs): + super().__init__() + # Here's where we set the variable from our input to pass it down to the source. + self.jwt = jwt + self.docker_username = config["docker_username"] + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + decoded_response = response.json() + if decoded_response["next"] is None: + return None + else: + para = urlparse(decoded_response["next"]).query + return "?" + para + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = "" + ) -> str: + return f"/v2/repositories/{self.docker_username}/" + str(next_page_token or "") + + def request_headers( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> Mapping[str, Any]: + return {"Authorization": self.jwt} + + def parse_response( + self, + response: requests.Response, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> Iterable[Mapping]: + for repository in response.json().get("results"): + yield repository diff --git a/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/spec.yaml b/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/spec.yaml new file mode 100644 index 000000000000..2461d7f0a8d9 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/source_dockerhub/spec.yaml @@ -0,0 +1,15 @@ +documentationUrl: https://docs.airbyte.io/integrations/sources/dockerhub +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Dockerhub Spec + type: object + required: + - docker_username + additionalProperties: false + properties: + docker_username: + type: string + description: Username of DockerHub person or organization (for https://hub.docker.com/v2/repositories/USERNAME/ API call) + pattern: ^[a-z0-9_\-]+$ + examples: + - airbyte diff --git a/airbyte-integrations/connectors/source-dockerhub/unit_tests/__init__.py b/airbyte-integrations/connectors/source-dockerhub/unit_tests/__init__.py new file mode 100644 index 000000000000..46b7376756ec --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-dockerhub/unit_tests/test_source.py b/airbyte-integrations/connectors/source-dockerhub/unit_tests/test_source.py new file mode 100644 index 000000000000..c0d1970236d3 --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/unit_tests/test_source.py @@ -0,0 +1,21 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from source_dockerhub.source import SourceDockerhub + + +def test_check_connection(): + source = SourceDockerhub() + logger_mock, config_mock = MagicMock(), {"docker_username": "airbyte"} # shouldnt actually ping network request in test but we will skip for now + assert source.check_connection(logger_mock, config_mock) == (True, None) + + +def test_streams(): + source = SourceDockerhub() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 1 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-dockerhub/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-dockerhub/unit_tests/test_streams.py new file mode 100644 index 000000000000..379d9a84cc2e --- /dev/null +++ b/airbyte-integrations/connectors/source-dockerhub/unit_tests/test_streams.py @@ -0,0 +1,52 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +import requests +from source_dockerhub.source import DockerHub + + +def test_next_page_token(): + stream = DockerHub(jwt="foo", config={"docker_username": "foo"}) + + # mocking the request with a response that has a next page token + response = requests.Response() + response.url = "https://foo" + response.json = MagicMock() + response.json.return_value = {"next": "https://foo?page=2"} + inputs = {"response": response} + + expected_token = "?page=2" # expected next page token + assert stream.next_page_token(**inputs) == expected_token + + +# cant get this to work - TypeError: 'list' object is not an iterator +# def test_parse_response(patch_base_class, mocker): +# response = mocker.MagicMock() +# response.json.return_value = {"one": 1} +# stream = DockerHub(jwt="foo", config={"docker_username": "foo"}) + +# inputs = { +# "response": response, +# "stream_state": MagicMock(), +# "stream_slice": MagicMock(), +# "next_page_token": MagicMock(), +# } + +# expected_parsed_object = {"one": 1} +# assert next(stream.parse_response(**inputs)) == expected_parsed_object + + +def test_request_headers(): + stream = DockerHub(jwt="foo", config={"docker_username": "foo"}) + + inputs = { + "stream_state": MagicMock(), + "stream_slice": MagicMock(), + "next_page_token": MagicMock(), + } + + expected_headers = {"Authorization": "foo"} + assert stream.request_headers(**inputs) == expected_headers diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md new file mode 100644 index 000000000000..96e6477d4b8c --- /dev/null +++ b/docs/SUMMARY.md @@ -0,0 +1,307 @@ +# Table of contents + +- [Introduction](../README.md) +- [Airbyte Cloud QuickStart](cloud/getting-started-with-airbyte-cloud.md) + - [Core Concepts](cloud/core-concepts.md) + - [Managing Airbyte Cloud](cloud/managing-airbyte-cloud.md) +- [Airbyte Open Source Quickstart](quickstart/README.md) + - [Deploy Airbyte](quickstart/deploy-airbyte.md) + - [Add a Source](quickstart/add-a-source.md) + - [Add a Destination](quickstart/add-a-destination.md) + - [Set up a Connection](quickstart/set-up-a-connection.md) +- [Deploying Airbyte Open Source](deploying-airbyte/README.md) + - [Local Deployment](deploying-airbyte/local-deployment.md) + - [On AWS (EC2)](deploying-airbyte/on-aws-ec2.md) + - [On AWS ECS (Coming Soon)](deploying-airbyte/on-aws-ecs.md) + - [On Azure(VM)](deploying-airbyte/on-azure-vm-cloud-shell.md) + - [On GCP (Compute Engine)](deploying-airbyte/on-gcp-compute-engine.md) + - [On Kubernetes (Beta)](deploying-airbyte/on-kubernetes.md) + - [On Plural (Beta)](deploying-airbyte/on-plural.md) + - [On Oracle Cloud Infrastructure VM](deploying-airbyte/on-oci-vm.md) + - [On Digital Ocean Droplet](deploying-airbyte/on-digitalocean-droplet.md) +- [Operator Guides](operator-guides/README.md) + - [Upgrading Airbyte](operator-guides/upgrading-airbyte.md) + - [Resetting Your Data](operator-guides/reset.md) + - [Configuring the Airbyte Database](operator-guides/configuring-airbyte-db.md) + - [Browsing Output Logs](operator-guides/browsing-output-logs.md) + - [Using the Airflow Airbyte Operator](operator-guides/using-the-airflow-airbyte-operator.md) + - [Using the Prefect Task](operator-guides/using-prefect-task.md) + - [Using the Dagster Integration](operator-guides/using-dagster-integration.md) + - [Windows - Browsing Local File Output](operator-guides/locating-files-local-destination.md) + - [Transformations and Normalization](operator-guides/transformation-and-normalization/README.md) + - [Transformations with SQL (Part 1/3)](operator-guides/transformation-and-normalization/transformations-with-sql.md) + - [Transformations with dbt (Part 2/3)](operator-guides/transformation-and-normalization/transformations-with-dbt.md) + - [Transformations with Airbyte (Part 3/3)](operator-guides/transformation-and-normalization/transformations-with-airbyte.md) + - [Configuring Airbyte](operator-guides/configuring-airbyte.md) + - [Sentry Integration](operator-guides/sentry-integration.md) + - [Using Custom Connectors](operator-guides/using-custom-connectors.md) + - [Scaling Airbyte](operator-guides/scaling-airbyte.md) + - [Securing Airbyte](operator-guides/securing-airbyte.md) +- [Connector Catalog](integrations/README.md) + - [Sources](integrations/sources/README.md) + - [3PL Central](integrations/sources/tplcentral.md) + - [Airtable](integrations/sources/airtable.md) + - [Amazon SQS](integrations/sources/amazon-sqs.md) + - [Amazon Seller Partner](integrations/sources/amazon-seller-partner.md) + - [Amazon Ads](integrations/sources/amazon-ads.md) + - [Amplitude](integrations/sources/amplitude.md) + - [Apify Dataset](integrations/sources/apify-dataset.md) + - [Appstore](integrations/sources/appstore.md) + - [Asana](integrations/sources/asana.md) + - [AWS CloudTrail](integrations/sources/aws-cloudtrail.md) + - [Azure Table Storage](integrations/sources/azure-table.md) + - [Bamboo HR](integrations/sources/bamboo-hr.md) + - [Bing Ads](integrations/sources/bing-ads.md) + - [BigCommerce](integrations/sources/bigcommerce.md) + - [BigQuery](integrations/sources/bigquery.md) + - [Braintree](integrations/sources/braintree.md) + - [Cart](integrations/sources/cart.md) + - [Chargebee](integrations/sources/chargebee.md) + - [Chartmogul](integrations/sources/chartmogul.md) + - [ClickHouse](integrations/sources/clickhouse.md) + - [Close.com](integrations/sources/close-com.md) + - [CockroachDB](integrations/sources/cockroachdb.md) + - [Confluence](integrations/sources/confluence.md) + - [Customer.io (Sponsored by Faros AI)](integrations/sources/customer-io.md) + - [Delighted](integrations/sources/delighted.md) + - [Db2](integrations/sources/db2.md) + - [Dixa](integrations/sources/dixa.md) + - [DockerHub](integrations/sources/dockerhub.md) + - [Drift](integrations/sources/drift.md) + - [Drupal](integrations/sources/drupal.md) + - [End-to-End Testing](integrations/sources/e2e-test.md) + - [Exchange Rates API](integrations/sources/exchangeratesapi.md) + - [Facebook Marketing](integrations/sources/facebook-marketing.md) + - [Facebook Pages](integrations/sources/facebook-pages.md) + - [Faker](integrations/sources/faker.md) + - [Files](integrations/sources/file.md) + - [Firebolt](integrations/sources/firebolt.md) + - [Flexport](integrations/sources/flexport.md) + - [Freshdesk](integrations/sources/freshdesk.md) + - [Freshsales](integrations/sources/freshsales.md) + - [Freshservice](integrations/sources/freshservice.md) + - [GitHub](integrations/sources/github.md) + - [GitLab](integrations/sources/gitlab.md) + - [Google Ads](integrations/sources/google-ads.md) + - [Google Analytics](integrations/sources/google-analytics-v4.md) + - [Google Directory](integrations/sources/google-directory.md) + - [Google Search Console](integrations/sources/google-search-console.md) + - [Google Sheets](integrations/sources/google-sheets.md) + - [Google Workspace Admin Reports](integrations/sources/google-workspace-admin-reports.md) + - [Greenhouse](integrations/sources/greenhouse.md) + - [Harvest](integrations/sources/harvest.md) + - [Harness (Sponsored by Faros AI)](integrations/sources/harness.md) + - [HTTP Request (Graveyarded)](integrations/sources/http-request.md) + - [HubSpot](integrations/sources/hubspot.md) + - [Instagram](integrations/sources/instagram.md) + - [Intercom](integrations/sources/intercom.md) + - [Iterable](integrations/sources/iterable.md) + - [Jenkins (Sponsored by Faros AI)](integrations/sources/jenkins.md) + - [Jira](integrations/sources/jira.md) + - [Kafka](integrations/sources/kafka.md) + - [Klaviyo](integrations/sources/klaviyo.md) + - [Kustomer](integrations/sources/kustomer.md) + - [Lemlist](integrations/sources/lemlist.md) + - [LinkedIn Ads](integrations/sources/linkedin-ads.md) + - [Linnworks](integrations/sources/linnworks.md) + - [Lever Hiring](integrations/sources/lever-hiring.md) + - [Looker](integrations/sources/looker.md) + - [Magento](integrations/sources/magento.md) + - [Mailchimp](integrations/sources/mailchimp.md) + - [Marketo](integrations/sources/marketo.md) + - [Microsoft Dynamics AX](integrations/sources/microsoft-dynamics-ax.md) + - [Microsoft Dynamics Customer Engagement](integrations/sources/microsoft-dynamics-customer-engagement.md) + - [Microsoft Dynamics GP](integrations/sources/microsoft-dynamics-gp.md) + - [Microsoft Dynamics NAV](integrations/sources/microsoft-dynamics-nav.md) + - [Microsoft SQL Server (MSSQL)](integrations/sources/mssql.md) + - [Microsoft Teams](integrations/sources/microsoft-teams.md) + - [Mixpanel](integrations/sources/mixpanel.md) + - [Monday](integrations/sources/monday.md) + - [Mongo DB](integrations/sources/mongodb-v2.md) + - [My Hours](integrations/sources/my-hours.md) + - [MySQL](integrations/sources/mysql.md) + - [Notion](integrations/sources/notion.md) + - [Okta](integrations/sources/okta.md) + - [OneSignal](integrations/sources/onesignal.md) + - [OpenWeather](integrations/sources/openweather.md) + - [Oracle DB](integrations/sources/oracle.md) + - [Oracle Peoplesoft](integrations/sources/oracle-peoplesoft.md) + - [Oracle Siebel CRM](integrations/sources/oracle-siebel-crm.md) + - [Orb](integrations/sources/orb.md) + - [Outreach](integrations/sources/outreach.md) + - [PagerDuty (Sponsored by Faros AI)](integrations/sources/pagerduty.md) + - [Paypal Transaction](integrations/sources/paypal-transaction.md) + - [Paystack](integrations/sources/paystack.md) + - [Persistiq](integrations/sources/persistiq.md) + - [Plaid](integrations/sources/plaid.md) + - [Pinterest](integrations/sources/pinterest.md) + - [Pipedrive](integrations/sources/pipedrive.md) + - [PokéAPI](integrations/sources/pokeapi.md) + - [Postgres](integrations/sources/postgres.md) + - [PostHog](integrations/sources/posthog.md) + - [PrestaShop](integrations/sources/presta-shop.md) + - [Qualaroo](integrations/sources/qualaroo.md) + - [QuickBooks](integrations/sources/quickbooks.md) + - [Recharge](integrations/sources/recharge.md) + - [Recurly](integrations/sources/recurly.md) + - [Redshift](integrations/sources/redshift.md) + - [S3](integrations/sources/s3.md) + - [SAP Business One](integrations/sources/sap-business-one.md) + - [SearchMetrics](integrations/sources/search-metrics.md) + - [Salesforce](integrations/sources/salesforce.md) + - [SalesLoft](integrations/sources/salesloft.md) + - [Sendgrid](integrations/sources/sendgrid.md) + - [Sentry](integrations/sources/sentry.md) + - [Shopify](integrations/sources/shopify.md) + - [Shortio](integrations/sources/shortio.md) + - [Slack](integrations/sources/slack.md) + - [Smartsheets](integrations/sources/smartsheets.md) + - [Snapchat Marketing](integrations/sources/snapchat-marketing.md) + - [Snowflake](integrations/sources/snowflake.md) + - [Spree Commerce](integrations/sources/spree-commerce.md) + - [Square](integrations/sources/square.md) + - [Strava](integrations/sources/strava.md) + - [Stripe](integrations/sources/stripe.md) + - [Sugar CRM](integrations/sources/sugar-crm.md) + - [SurveyMonkey](integrations/sources/surveymonkey.md) + - [Tempo](integrations/sources/tempo.md) + - [TikTok Marketing](integrations/sources/tiktok-marketing.md) + - [Trello](integrations/sources/trello.md) + - [Twilio](integrations/sources/twilio.md) + - [TiDB](integrations/sources/tidb.md) + - [Typeform](integrations/sources/typeform.md) + - [US Census API](integrations/sources/us-census.md) + - [VictorOps (Sponsored by Faros AI)](integrations/sources/victorops.md) + - [Woo Commerce](integrations/sources/woocommerce.md) + - [Wordpress](integrations/sources/wordpress.md) + - [YouTube Analytics](integrations/sources/youtube-analytics.md) + - [Zencart](integrations/sources/zencart.md) + - [Zendesk Chat](integrations/sources/zendesk-chat.md) + - [Zendesk Sunshine](integrations/sources/zendesk-sunshine.md) + - [Zendesk Support](integrations/sources/zendesk-support.md) + - [Zendesk Talk](integrations/sources/zendesk-talk.md) + - [Zenloop](integrations/sources/zenloop.md) + - [Zoho CRM](integrations/sources/zoho-crm.md) + - [Zoom](integrations/sources/zoom.md) + - [Zuora](integrations/sources/zuora.md) + - [Destinations](integrations/destinations/README.md) + - [Amazon SQS](integrations/destinations/amazon-sqs.md) + - [AzureBlobStorage](integrations/destinations/azureblobstorage.md) + - [BigQuery](integrations/destinations/bigquery.md) + - [ClickHouse](integrations/destinations/clickhouse.md) + - [Databricks](integrations/destinations/databricks.md) + - [DynamoDB](integrations/destinations/dynamodb.md) + - [Elasticsearch](integrations/destinations/elasticsearch.md) + - [End-to-End Testing](integrations/destinations/e2e-test.md) + - [Chargify](integrations/destinations/chargify.md) + - [Google Cloud Storage (GCS)](integrations/destinations/gcs.md) + - [Google Firestore](integrations/destinations/firestore.md) + - [Google PubSub](integrations/destinations/pubsub.md) + - [Google Sheets](integrations/destinations/google-sheets.md) + - [Kafka](integrations/destinations/kafka.md) + - [Keen](integrations/destinations/keen.md) + - [Local CSV](integrations/destinations/local-csv.md) + - [Local JSON](integrations/destinations/local-json.md) + - [MariaDB ColumnStore](integrations/destinations/mariadb-columnstore.md) + - [MeiliSearch](integrations/destinations/meilisearch.md) + - [MongoDB](integrations/destinations/mongodb.md) + - [MQTT](integrations/destinations/mqtt.md) + - [MSSQL](integrations/destinations/mssql.md) + - [MySQL](integrations/destinations/mysql.md) + - [Oracle DB](integrations/destinations/oracle.md) + - [Postgres](integrations/destinations/postgres.md) + - [Pulsar](integrations/destinations/pulsar.md) + - [RabbitMQ](integrations/destinations/rabbitmq.md) + - [Redshift](integrations/destinations/redshift.md) + - [Rockset](integrations/destinations/rockset.md) + - [S3](integrations/destinations/s3.md) + - [SFTP JSON](integrations/destinations/sftp-json.md) + - [Snowflake](integrations/destinations/snowflake.md) + - [Cassandra](integrations/destinations/cassandra.md) + - [Scylla](integrations/destinations/scylla.md) + - [Redis](integrations/destinations/redis.md) + - [Kinesis](integrations/destinations/kinesis.md) + - [Streamr](integrations/destinations/streamr.md) + - [Custom or New Connector](integrations/custom-connectors.md) +- [Connector Development](connector-development/README.md) + - [Tutorials](connector-development/tutorials/README.md) + - [Python CDK Speedrun: Creating a Source](connector-development/tutorials/cdk-speedrun.md) + - [Python CDK: Creating a HTTP API Source](connector-development/tutorials/cdk-tutorial-python-http/README.md) + - [Getting Started](connector-development/tutorials/cdk-tutorial-python-http/0-getting-started.md) + - [Step 1: Creating the Source](connector-development/tutorials/cdk-tutorial-python-http/1-creating-the-source.md) + - [Step 2: Install Dependencies](connector-development/tutorials/cdk-tutorial-python-http/2-install-dependencies.md) + - [Step 3: Define Inputs](connector-development/tutorials/cdk-tutorial-python-http/3-define-inputs.md) + - [Step 4: Connection Checking](connector-development/tutorials/cdk-tutorial-python-http/4-connection-checking.md) + - [Step 5: Declare the Schema](connector-development/tutorials/cdk-tutorial-python-http/5-declare-schema.md) + - [Step 6: Read Data](connector-development/tutorials/cdk-tutorial-python-http/6-read-data.md) + - [Step 7: Use the Connector in Airbyte](connector-development/tutorials/cdk-tutorial-python-http/7-use-connector-in-airbyte.md) + - [Step 8: Test Connector](connector-development/tutorials/cdk-tutorial-python-http/8-test-your-connector.md) + - [Building a Python Source](connector-development/tutorials/building-a-python-source.md) + - [Building a Python Destination](connector-development/tutorials/building-a-python-destination.md) + - [Building a Java Destination](connector-development/tutorials/building-a-java-destination.md) + - [Profile Java Connector Memory](connector-development/tutorials/profile-java-connector-memory.md) + - [Connector Development Kit (Python)](connector-development/cdk-python/README.md) + - [Basic Concepts](connector-development/cdk-python/basic-concepts.md) + - [Defining Stream Schemas](connector-development/cdk-python/schemas.md) + - [Full Refresh Streams](connector-development/cdk-python/full-refresh-stream.md) + - [Incremental Streams](connector-development/cdk-python/incremental-stream.md) + - [HTTP-API-based Connectors](connector-development/cdk-python/http-streams.md) + - [Python Concepts](connector-development/cdk-python/python-concepts.md) + - [Stream Slices](connector-development/cdk-python/stream-slices.md) + - [Connector Development Kit (Javascript)](connector-development/cdk-faros-js.md) + - [Airbyte 101 for Connector Development](connector-development/airbyte101.md) + - [Testing Connectors](connector-development/testing-connectors/README.md) + - [Source Acceptance Tests Reference](connector-development/testing-connectors/source-acceptance-tests-reference.md) + - [Connector Specification Reference](connector-development/connector-specification-reference.md) + - [Best Practices](connector-development/best-practices.md) + - [UX Handbook](connector-development/ux-handbook.md) +- [Contributing to Airbyte](contributing-to-airbyte/README.md) + - [Code of Conduct](contributing-to-airbyte/code-of-conduct.md) + - [Developing Locally](contributing-to-airbyte/developing-locally.md) + - [Developing on Docker](contributing-to-airbyte/developing-on-docker.md) + - [Developing on Kubernetes](contributing-to-airbyte/developing-on-kubernetes.md) + - [Monorepo Python Development](contributing-to-airbyte/monorepo-python-development.md) + - [Code Style](contributing-to-airbyte/code-style.md) + - [Gradle Cheatsheet](contributing-to-airbyte/gradle-cheatsheet.md) + - [Updating Documentation](contributing-to-airbyte/updating-documentation.md) + - [Templates](contributing-to-airbyte/templates/README.md) + - [Connector Doc Template](contributing-to-airbyte/templates/integration-documentation-template.md) +- [Understanding Airbyte](understanding-airbyte/README.md) + - [A Beginner's Guide to the AirbyteCatalog](understanding-airbyte/beginners-guide-to-catalog.md) + - [AirbyteCatalog Reference](understanding-airbyte/catalog.md) + - [Airbyte Specification](understanding-airbyte/airbyte-specification.md) + - [Basic Normalization](understanding-airbyte/basic-normalization.md) + - [Connections and Sync Modes](understanding-airbyte/connections/README.md) + - [Full Refresh - Overwrite](understanding-airbyte/connections/full-refresh-overwrite.md) + - [Full Refresh - Append](understanding-airbyte/connections/full-refresh-append.md) + - [Incremental Sync - Append](understanding-airbyte/connections/incremental-append.md) + - [Incremental Sync - Deduped History](understanding-airbyte/connections/incremental-deduped-history.md) + - [Operations](understanding-airbyte/operations.md) + - [High-level View](understanding-airbyte/high-level-view.md) + - [Workers & Jobs](understanding-airbyte/jobs.md) + - [Technical Stack](understanding-airbyte/tech-stack.md) + - [Change Data Capture (CDC)](understanding-airbyte/cdc.md) + - [Namespaces](understanding-airbyte/namespaces.md) + - [Supported Data Types](understanding-airbyte/supported-data-types.md) + - [Json to Avro Conversion](understanding-airbyte/json-avro-conversion.md) + - [Glossary of Terms](understanding-airbyte/glossary.md) +- [API documentation](api-documentation.md) +- [CLI documentation](https://github.com/airbytehq/airbyte/tree/master/octavia-cli) +- [Project Overview](project-overview/README.md) + - [Roadmap](project-overview/roadmap.md) + - [Changelog](project-overview/changelog/README.md) + - [Platform](project-overview/changelog/platform.md) + - [Connectors](project-overview/changelog/connectors.md) + - [Slack Code of Conduct](project-overview/slack-code-of-conduct.md) + - [Security and Data Privacy](project-overview/security.md) + - [Licenses](project-overview/licenses/README.md) + - [License FAQ](project-overview/licenses/license-faq.md) + - [ELv2](project-overview/licenses/elv2-license.md) + - [MIT](project-overview/licenses/mit-license.md) + - [Examples](project-overview/licenses/examples.md) + - [Product Release Stages](project-overview/product-release-stages.md) +- [Troubleshooting & FAQ](troubleshooting/README.md) + - [On Deploying](troubleshooting/on-deploying.md) + - [On Setting up a New Connection](troubleshooting/new-connection.md) + - [On Running a Sync](troubleshooting/running-sync.md) + - [On Upgrading](troubleshooting/on-upgrading.md) \ No newline at end of file diff --git a/docs/connector-development/tutorials/cdk-speedrun.md b/docs/connector-development/tutorials/cdk-speedrun.md index 76e2e1cdda88..f6386d8e372c 100644 --- a/docs/connector-development/tutorials/cdk-speedrun.md +++ b/docs/connector-development/tutorials/cdk-speedrun.md @@ -71,18 +71,25 @@ Ok, let's write a function that checks the inputs we just defined. Nuke the `sou from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple import requests +import logging from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http import HttpStream from . import pokemon_list +logger = logging.getLogger("airbyte") + class SourcePythonHttpExample(AbstractSource): def check_connection(self, logger, config) -> Tuple[bool, any]: + logger.info("Checking Pokemon API connection...") input_pokemon = config["pokemon_name"] if input_pokemon not in pokemon_list.POKEMON_LIST: - return False, f"Input Pokemon {input_pokemon} is invalid. Please check your spelling and input a valid Pokemon." + result = f"Input Pokemon {input_pokemon} is invalid. Please check your spelling and input a valid Pokemon." + logger.info(f"PokeAPI connection failed: {result}") + return False, result else: + logger.info(f"PokeAPI connection success: {input_pokemon} is a valid Pokemon") return True, None def streams(self, config: Mapping[str, Any]) -> List[Stream]: diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 4d75c4144aef..4cb665253100 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -46,6 +46,7 @@ For more information about the grading system, see [Product Release Stages](http | [Db2](sources/db2.md) | Alpha | No | | [Delighted](sources/delighted.md) | Alpha | Yes | | [Dixa](sources/dixa.md) | Alpha | Yes | +| [Dockerhub](sources/dockerhub.md) | Alpha | No | | [Drift](sources/drift.md) | Alpha | No | | [Drupal](sources/drupal.md) | Alpha | No | | [End-to-End Testing](sources/e2e-test.md) | Alpha | Yes | diff --git a/docs/integrations/sources/dockerhub.md b/docs/integrations/sources/dockerhub.md new file mode 100644 index 000000000000..e87706cf73b6 --- /dev/null +++ b/docs/integrations/sources/dockerhub.md @@ -0,0 +1,40 @@ +# Dockerhub + +## Sync overview + +This source can sync data for the DockerHub API. It currently supports only [listing public repos](https://github.com/airbytehq/airbyte/issues/12773) and Full Refresh syncing for now. You supply a `docker_username`, and it will sync down all info about repos published under that name. + +### Output schema + +This Source is capable of syncing the following Streams: + +* DockerHub + +### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | Yes | | +| Incremental Sync | No | | +| Namespaces | No | | + +### Performance considerations + +This connector has been tested for the Airbyte organization, which has 266 repos, and works fine. It should not run into limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. + +## Getting started + +### Requirements + +* None + +### Setup guide + +1. Define a `docker_username`: the username that the connector will pull all repo data from. + +## Changelog + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2022-05-20 | [13007](https://github.com/airbytehq/airbyte/pull/13007) | New source | +