From 6d3a80aafeb749889a1c55debaa6b8ac4fc6a3ba Mon Sep 17 00:00:00 2001 From: Serhii Lazebnyi <53845333+lazebnyi@users.noreply.github.com> Date: Wed, 22 Dec 2021 22:31:21 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20=20New=20Source:=20SearchMetrics?= =?UTF-8?q?=20(#6992)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added streams * SearchMetrics implementation * Updated unit tests * Update to review and tests * Add SEARCH_METRICS_TESTS_CREDS to ci creds * Add CountDomainKeyword stream and updated to review * Updated to new master * Updated to linter * Deleted windows in days from config * Updated version in seed --- .../8d7ef552-2c0f-11ec-8d3d-0242ac130003.json | 8 + .../main/resources/icons/searchmetrics.svg | 13 + .../resources/seed/source_definitions.yaml | 7 + .../source-search-metrics/.dockerignore | 7 + .../source-search-metrics/Dockerfile | 38 ++ .../source-search-metrics/README.md | 132 +++++++ .../acceptance-test-config.yml | 31 ++ .../acceptance-test-docker.sh | 16 + .../source-search-metrics/build.gradle | 9 + .../integration_tests/__init__.py | 3 + .../integration_tests/abnormal_state.json | 11 + .../integration_tests/acceptance.py | 13 + .../integration_tests/catalog.json | 24 ++ .../integration_tests/configured_catalog.json | 208 ++++++++++ .../integration_tests/invalid_config.json | 7 + .../integration_tests/sample_config.json | 7 + .../integration_tests/sample_state.json | 11 + .../connectors/source-search-metrics/main.py | 13 + .../source-search-metrics/requirements.txt | 2 + .../connectors/source-search-metrics/setup.py | 29 ++ .../source_search_metrics/__init__.py | 8 + .../source_search_metrics/schemas/TODO.md | 25 ++ .../schemas/benchmark_rankings_s7.json | 21 + .../schemas/competitor_rankings_s7.json | 21 + .../schemas/count_domain_keyword.json | 12 + .../schemas/distribution_keywords_s7.json | 21 + .../schemas/keyword_potentials_s7.json | 27 ++ .../schemas/list_competitors.json | 33 ++ .../schemas/list_competitors_relevancy.json | 66 ++++ .../schemas/list_losers_s7.json | 44 +++ .../schemas/list_market_share_s7.json | 29 ++ .../list_position_spread_historic_s7.json | 135 +++++++ .../schemas/list_rankings_analysis_s7.json | 38 ++ .../schemas/list_rankings_domain.json | 88 +++++ .../schemas/list_rankings_historic_s7.json | 87 +++++ .../schemas/list_seo_visibility_country.json | 12 + .../list_seo_visibility_historic_s7.json | 27 ++ .../schemas/list_serp_spread_s7.json | 87 +++++ .../schemas/list_winners_s7.json | 44 +++ .../schemas/marketshare_value_s7.json | 38 ++ .../schemas/projects.json | 27 ++ .../schemas/seo_visibility_value_s7.json | 33 ++ .../schemas/serp_spread_value_s7.json | 29 ++ .../schemas/tag_potentials_s7.json | 36 ++ .../source_search_metrics/schemas/tags.json | 33 ++ .../schemas/url_rankings_s7.json | 87 +++++ .../source_search_metrics/source.py | 360 ++++++++++++++++++ .../source_search_metrics/spec.json | 71 ++++ .../source_search_metrics/utils.py | 14 + .../unit_tests/unit_test.py | 8 + docs/SUMMARY.md | 1 + docs/integrations/README.md | 1 + docs/integrations/sources/search-metrics.md | 65 ++++ 53 files changed, 2217 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/8d7ef552-2c0f-11ec-8d3d-0242ac130003.json create mode 100644 airbyte-config/init/src/main/resources/icons/searchmetrics.svg create mode 100644 airbyte-integrations/connectors/source-search-metrics/.dockerignore create mode 100644 airbyte-integrations/connectors/source-search-metrics/Dockerfile create mode 100644 airbyte-integrations/connectors/source-search-metrics/README.md create mode 100644 airbyte-integrations/connectors/source-search-metrics/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-search-metrics/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-search-metrics/build.gradle create mode 100644 airbyte-integrations/connectors/source-search-metrics/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-search-metrics/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-search-metrics/integration_tests/catalog.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/main.py create mode 100644 airbyte-integrations/connectors/source-search-metrics/requirements.txt create mode 100644 airbyte-integrations/connectors/source-search-metrics/setup.py create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/__init__.py create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/TODO.md create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/benchmark_rankings_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/competitor_rankings_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/count_domain_keyword.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/distribution_keywords_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/keyword_potentials_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_competitors.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_competitors_relevancy.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_losers_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_market_share_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_position_spread_historic_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_analysis_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_domain.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_historic_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_seo_visibility_country.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_seo_visibility_historic_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_serp_spread_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_winners_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/marketshare_value_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/projects.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/seo_visibility_value_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/serp_spread_value_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/tag_potentials_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/tags.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/url_rankings_s7.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/source.py create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/spec.json create mode 100644 airbyte-integrations/connectors/source-search-metrics/source_search_metrics/utils.py create mode 100644 airbyte-integrations/connectors/source-search-metrics/unit_tests/unit_test.py create mode 100644 docs/integrations/sources/search-metrics.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/8d7ef552-2c0f-11ec-8d3d-0242ac130003.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/8d7ef552-2c0f-11ec-8d3d-0242ac130003.json new file mode 100644 index 000000000000..520166fafe32 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/8d7ef552-2c0f-11ec-8d3d-0242ac130003.json @@ -0,0 +1,8 @@ +{ + "sourceDefinitionId": "8d7ef552-2c0f-11ec-8d3d-0242ac130003", + "name": "SearchMetrics", + "dockerRepository": "airbyte/source-search-metrics", + "dockerImageTag": "0.1.1", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/search-metrics", + "icon": "searchmetrics.svg" +} diff --git a/airbyte-config/init/src/main/resources/icons/searchmetrics.svg b/airbyte-config/init/src/main/resources/icons/searchmetrics.svg new file mode 100644 index 000000000000..9653a94e3e3f --- /dev/null +++ b/airbyte-config/init/src/main/resources/icons/searchmetrics.svg @@ -0,0 +1,13 @@ + + + + +Created by potrace 1.16, written by Peter Selinger 2001-2019 + + + + diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 60b4a84a9eb1..03d7fe46f8b4 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -611,6 +611,13 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api +- name: SearchMetrics, + sourceDefinitionId: 8d7ef552-2c0f-11ec-8d3d-0242ac130003, + dockerRepository": airbyte/source-search-metrics, + dockerImageTag": 0.1.1, + documentationUrl": https://docs.airbyte.io/integrations/sources/search-metrics + icon: searchmetrics.svg + sourceType: api - name: Sendgrid sourceDefinitionId: fbb5fbe2-16ad-4cf4-af7d-ff9d9c316c87 dockerRepository: airbyte/source-sendgrid diff --git a/airbyte-integrations/connectors/source-search-metrics/.dockerignore b/airbyte-integrations/connectors/source-search-metrics/.dockerignore new file mode 100644 index 000000000000..4b799fef5dae --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_search_metrics +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-search-metrics/Dockerfile b/airbyte-integrations/connectors/source-search-metrics/Dockerfile new file mode 100644 index 000000000000..1baeeeaee74c --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_search_metrics ./source_search_metrics + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.name=airbyte/source-search-metrics diff --git a/airbyte-integrations/connectors/source-search-metrics/README.md b/airbyte-integrations/connectors/source-search-metrics/README.md new file mode 100644 index 000000000000..5a42bd6ec231 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/README.md @@ -0,0 +1,132 @@ +# Search Metrics Source + +This is the repository for the Search Metrics source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/search-metrics). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-search-metrics:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/search-metrics) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_search_metrics/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source search-metrics test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-search-metrics:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-search-metrics:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-search-metrics:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-search-metrics:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-search-metrics:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-search-metrics:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-search-metrics:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-search-metrics:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-search-metrics/acceptance-test-config.yml b/airbyte-integrations/connectors/source-search-metrics/acceptance-test-config.yml new file mode 100644 index 000000000000..bb9c6666e82a --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/acceptance-test-config.yml @@ -0,0 +1,31 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-search-metrics:dev +tests: + spec: + - spec_path: "source_search_metrics/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: ["list_market_share_s7", + "list_rankings_domain", + "list_competitors", + "distribution_keywords_s7", + "list_position_spread_historic_s7", + "list_seo_visibility_historic_s7", + "count_domain_keyword"] +# Incremental commented because incremental streams haven't records +# incremental: +# - config_path: "secrets/config.json" +# configured_catalog_path: "integration_tests/configured_catalog.json" +# future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-search-metrics/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-search-metrics/acceptance-test-docker.sh new file mode 100644 index 000000000000..e4d8b1cef896 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-search-metrics/build.gradle b/airbyte-integrations/connectors/source-search-metrics/build.gradle new file mode 100644 index 000000000000..fd6ad33c734a --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_search_metrics' +} diff --git a/airbyte-integrations/connectors/source-search-metrics/integration_tests/__init__.py b/airbyte-integrations/connectors/source-search-metrics/integration_tests/__init__.py new file mode 100644 index 000000000000..46b7376756ec --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-search-metrics/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-search-metrics/integration_tests/abnormal_state.json new file mode 100644 index 000000000000..a04b24252aed --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/integration_tests/abnormal_state.json @@ -0,0 +1,11 @@ +{ + "list_market_share_s7": { + "date": 30210807 + }, + "list_position_spread_historic_s7": { + "date": 30210807 + }, + "list_seo_visibility_historic_s7": { + "date": 30210807 + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-search-metrics/integration_tests/acceptance.py new file mode 100644 index 000000000000..724da010a0ce --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/integration_tests/acceptance.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + yield diff --git a/airbyte-integrations/connectors/source-search-metrics/integration_tests/catalog.json b/airbyte-integrations/connectors/source-search-metrics/integration_tests/catalog.json new file mode 100644 index 000000000000..0147263f4c92 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/integration_tests/catalog.json @@ -0,0 +1,24 @@ +{ + "streams": [ + { + "stream": { + "name": "projects", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_market_share_s7", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" + } + ] +} diff --git a/airbyte-integrations/connectors/source-search-metrics/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-search-metrics/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..746c184b525e --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/integration_tests/configured_catalog.json @@ -0,0 +1,208 @@ +{ + "streams": [ + { + "stream": { + "name": "projects", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "count_domain_keyword", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_market_share_s7", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "list_position_spread_historic_s7", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "list_seo_visibility_historic_s7", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "tags", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "benchmark_rankings_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "competitor_rankings_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "distribution_keywords_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "keyword_potentials_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "tag_potentials_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "url_rankings_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "seo_visibility_value_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "serp_spread_value_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_competitors", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_competitors_relevancy", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_rankings_domain", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_seo_visibility_country", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_serp_spread_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_rankings_historic_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_winners_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "list_losers_s7", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-search-metrics/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-search-metrics/integration_tests/invalid_config.json new file mode 100644 index 000000000000..9a2e4444acf9 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/integration_tests/invalid_config.json @@ -0,0 +1,7 @@ +{ + "api_key": "invalid_api_key", + "client_secret": "invalid_client_secret", + "country_code": "CA", + "start_date": "20330807", + "window_in_days": 30 +} diff --git a/airbyte-integrations/connectors/source-search-metrics/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-search-metrics/integration_tests/sample_config.json new file mode 100644 index 000000000000..355a9a0e04bf --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/integration_tests/sample_config.json @@ -0,0 +1,7 @@ +{ + "api_key": "api_key", + "client_secret": "client_secret", + "country_code": "CA", + "start_date": "20210807", + "window_in_days": 30 +} diff --git a/airbyte-integrations/connectors/source-search-metrics/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-search-metrics/integration_tests/sample_state.json new file mode 100644 index 000000000000..81cf082cea41 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/integration_tests/sample_state.json @@ -0,0 +1,11 @@ +{ + "list_market_share_s7": { + "date": 20210807 + }, + "list_position_spread_historic_s7": { + "date": 20210807 + }, + "list_seo_visibility_historic_s7": { + "date": 20210807 + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/main.py b/airbyte-integrations/connectors/source-search-metrics/main.py new file mode 100644 index 000000000000..612fa2791b12 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_search_metrics import SourceSearchMetrics + +if __name__ == "__main__": + source = SourceSearchMetrics() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-search-metrics/requirements.txt b/airbyte-integrations/connectors/source-search-metrics/requirements.txt new file mode 100644 index 000000000000..0411042aa091 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-search-metrics/setup.py b/airbyte-integrations/connectors/source-search-metrics/setup.py new file mode 100644 index 000000000000..c9fb15bf5ba7 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_search_metrics", + description="Source implementation for Search Metrics.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/__init__.py b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/__init__.py new file mode 100644 index 000000000000..9b0b409b9906 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceSearchMetrics + +__all__ = ["SourceSearchMetrics"] diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/TODO.md b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/TODO.md new file mode 100644 index 000000000000..cf1efadb3c9c --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/TODO.md @@ -0,0 +1,25 @@ +# TODO: Define your stream schemas +Your connector must describe the schema of each stream it can output using [JSONSchema](https://json-schema.org). + +The simplest way to do this is to describe the schema of your streams using one `.json` file per stream. You can also dynamically generate the schema of your stream in code, or you can combine both approaches: start with a `.json` file and dynamically add properties to it. + +The schema of a stream is the return value of `Stream.get_json_schema`. + +## Static schemas +By default, `Stream.get_json_schema` reads a `.json` file in the `schemas/` directory whose name is equal to the value of the `Stream.name` property. In turn `Stream.name` by default returns the name of the class in snake case. Therefore, if you have a class `class EmployeeBenefits(HttpStream)` the default behavior will look for a file called `schemas/employee_benefits.json`. You can override any of these behaviors as you need. + +Important note: any objects referenced via `$ref` should be placed in the `shared/` directory in their own `.json` files. + +## Dynamic schemas +If you'd rather define your schema in code, override `Stream.get_json_schema` in your stream class to return a `dict` describing the schema using [JSONSchema](https://json-schema.org). + +## Dynamically modifying static schemas +Override `Stream.get_json_schema` to run the default behavior, edit the returned value, then return the edited value: +``` +def get_json_schema(self): + schema = super().get_json_schema() + schema['dynamically_determined_property'] = "property" + return schema +``` + +Delete this file once you're done. Or don't. Up to you :) diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/benchmark_rankings_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/benchmark_rankings_s7.json new file mode 100644 index 000000000000..6bea16765247 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/benchmark_rankings_s7.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "visibility": { + "type": ["null", "integer"] + }, + "domain": { + "type": ["null", "string"] + }, + "avg_position": { + "type": ["null", "number"] + }, + "keyword_coverage": { + "type": ["null", "number"] + }, + "url_type": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/competitor_rankings_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/competitor_rankings_s7.json new file mode 100644 index 000000000000..7486d9e93a4b --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/competitor_rankings_s7.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "visibility": { + "type": ["null", "integer"] + }, + "avg_position": { + "type": ["null", "number"] + }, + "keyword_count": { + "type": ["null", "integer"] + }, + "domain": { + "type": ["null", "string"] + }, + "url_type": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/count_domain_keyword.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/count_domain_keyword.json new file mode 100644 index 000000000000..645a8b56c4a5 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/count_domain_keyword.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "count": { + "type": ["null", "integer"] + }, + "balance": { + "type": ["null", "integer"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/distribution_keywords_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/distribution_keywords_s7.json new file mode 100644 index 000000000000..8f0b4b2cef49 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/distribution_keywords_s7.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "desktop": { + "type": ["null", "string"] + }, + "mobile": { + "type": ["null", "string"] + }, + "smartphone": { + "type": ["null", "string"] + }, + "tablet": { + "type": ["null", "string"] + }, + "keyword": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/keyword_potentials_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/keyword_potentials_s7.json new file mode 100644 index 000000000000..9037fba170d1 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/keyword_potentials_s7.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "traffic": { + "type": ["null", "integer"] + }, + "traffic_max": { + "type": ["null", "integer"] + }, + "traffic_volume": { + "type": ["null", "string"] + }, + "max_potential": { + "type": ["null", "integer"] + }, + "se_position": { + "type": ["null", "string"] + }, + "kd": { + "type": ["null", "string"] + }, + "keyword": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_competitors.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_competitors.json new file mode 100644 index 000000000000..8f0274bbf64a --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_competitors.json @@ -0,0 +1,33 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "domain": { + "type": ["null", "string"] + }, + "avg_position_1": { + "type": ["null", "number"] + }, + "avg_position_2": { + "type": ["null", "number"] + }, + "common_keywords": { + "type": ["null", "string"] + }, + "traffic_1": { + "type": ["null", "integer"] + }, + "traffic_2": { + "type": ["null", "integer"] + }, + "costs": { + "type": ["null", "number"] + }, + "total_kw_count": { + "type": ["null", "string"] + }, + "diff_keywords": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_competitors_relevancy.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_competitors_relevancy.json new file mode 100644 index 000000000000..197e9a43172f --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_competitors_relevancy.json @@ -0,0 +1,66 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "domain_id": { + "type": ["null", "integer"] + }, + "host_id": { + "type": ["null", "integer"] + }, + "own_domain": { + "type": ["null", "boolean"] + }, + "domain": { + "type": ["null", "string"] + }, + "own_avg_position": { + "type": ["null", "integer"] + }, + "competitor_avg_position": { + "type": ["null", "integer"] + }, + "own_traffic_index": { + "type": ["null", "integer"] + }, + "competitor_traffic_index": { + "type": ["null", "integer"] + }, + "own_traffic_index_value": { + "type": ["null", "number"] + }, + "competitor_traffic_index_value": { + "type": ["null", "number"] + }, + "traffic_index_difference": { + "type": ["null", "integer"] + }, + "avg_traffic_value": { + "type": ["null", "number"] + }, + "traffic_index_potential": { + "type": ["null", "integer"] + }, + "traffic_index_value_potential": { + "type": ["null", "number"] + }, + "keyword_count": { + "type": ["null", "integer"] + }, + "own_keyword_count": { + "type": ["null", "integer"] + }, + "shared_keyword_count": { + "type": ["null", "integer"] + }, + "keyword_overlap": { + "type": ["null", "number"] + }, + "competitor_keyword_count": { + "type": ["null", "integer"] + }, + "keyword_relevance": { + "type": ["null", "integer"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_losers_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_losers_s7.json new file mode 100644 index 000000000000..ea12aee4247c --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_losers_s7.json @@ -0,0 +1,44 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "url_last": { + "type": ["null", "string"] + }, + "se_position": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "traffic_volume": { + "type": ["null", "string"] + }, + "cpc": { + "type": ["null", "number"] + }, + "original_position": { + "type": ["null", "string"] + }, + "delta_traffic": { + "type": ["null", "integer"] + }, + "trend": { + "type": ["null", "object"], + "properties": { + "trend": { + "type": ["null", "integer"] + }, + "abs": { + "type": ["null", "integer"] + }, + "per": { + "type": ["null", "number"] + } + } + }, + "keyword": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_market_share_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_market_share_s7.json new file mode 100644 index 000000000000..f051b24524a5 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_market_share_s7.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "date": { + "type": ["null", "integer"] + }, + "data": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "domain": { + "type": ["null", "string"] + }, + "host": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "marketshare": { + "type": ["null", "number"] + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_position_spread_historic_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_position_spread_historic_s7.json new file mode 100644 index 000000000000..247266bb4d31 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_position_spread_historic_s7.json @@ -0,0 +1,135 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "date": { + "type": ["null", "string"] + }, + "data": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "domain": { + "type": ["null", "string"] + }, + "data": { + "type": ["null", "object"], + "properties": { + "position1": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position2": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position3": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position4": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position5": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position6": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position7": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position8": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position9": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "position10": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + } + } + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_analysis_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_analysis_s7.json new file mode 100644 index 000000000000..b472da1ffa9a --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_analysis_s7.json @@ -0,0 +1,38 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "url": { + "type": ["null", "string"] + }, + "position": { + "type": ["null", "integer"] + }, + "page": { + "type": ["null", "integer"] + }, + "type_id": { + "type": ["null", "integer"] + }, + "title": { + "type": ["null", "string"] + }, + "url_type": { + "type": ["null", "string"] + }, + "trend": { + "type": ["null", "object"], + "properties": { + "trend": { + "type": ["null", "integer"] + }, + "abs": { + "type": ["null", "string"] + }, + "per": { + "type": ["null", "number"] + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_domain.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_domain.json new file mode 100644 index 000000000000..8d7692a51789 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_domain.json @@ -0,0 +1,88 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "pos": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "position": { + "type": ["null", "integer"] + }, + "page": { + "type": ["null", "string"] + }, + "directory": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "visibility": { + "type": ["null", "string"] + }, + "traffic": { + "type": ["null", "integer"] + }, + "ad_budget": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "traffic_volume": { + "type": ["null", "string"] + }, + "cpc": { + "type": ["null", "number"] + }, + "delta_traffic": { + "type": ["null", "string"] + }, + "trend": { + "type": ["null", "object"], + "properties": { + "trend": { + "type": ["null", "integer"] + }, + "abs": { + "type": ["null", "string"] + }, + "per": { + "type": ["null", "integer"] + } + } + }, + "tags": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + } + } + } + }, + "keyword": { + "type": ["null", "string"] + }, + "tags": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "delete_date": { + "type": ["null", "string"], + "format": "date" + }, + "date": { + "type": ["null", "string"], + "format": "date" + }, + "trend_date": { + "type": ["null", "string"], + "format": "date" + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_historic_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_historic_s7.json new file mode 100644 index 000000000000..a352b644db5e --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_rankings_historic_s7.json @@ -0,0 +1,87 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "date": { + "type": ["null", "integer"], + "format": "date" + }, + "group1": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group2": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group3": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group4": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group5": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group6": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group7": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_seo_visibility_country.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_seo_visibility_country.json new file mode 100644 index 000000000000..4597cddbd6ab --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_seo_visibility_country.json @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "country_code": { + "type": ["null", "string"] + }, + "visibility": { + "type": ["null", "integer"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_seo_visibility_historic_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_seo_visibility_historic_s7.json new file mode 100644 index 000000000000..6bf8bfc9967f --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_seo_visibility_historic_s7.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "date": { + "type": ["null", "integer"], + "format": "date" + }, + "data": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "domain": { + "type": ["null", "string"] + }, + "sum_visibility": { + "type": ["null", "integer"] + }, + "count_keywords": { + "type": ["null", "string"] + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_serp_spread_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_serp_spread_s7.json new file mode 100644 index 000000000000..a352b644db5e --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_serp_spread_s7.json @@ -0,0 +1,87 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "date": { + "type": ["null", "integer"], + "format": "date" + }, + "group1": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group2": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group3": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group4": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group5": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group6": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + }, + "group7": { + "type": ["null", "object"], + "properties": { + "keyword_count": { + "type": ["null", "integer"] + }, + "keyword_per": { + "type": ["null", "number"] + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_winners_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_winners_s7.json new file mode 100644 index 000000000000..ea12aee4247c --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/list_winners_s7.json @@ -0,0 +1,44 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "url_last": { + "type": ["null", "string"] + }, + "se_position": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "traffic_volume": { + "type": ["null", "string"] + }, + "cpc": { + "type": ["null", "number"] + }, + "original_position": { + "type": ["null", "string"] + }, + "delta_traffic": { + "type": ["null", "integer"] + }, + "trend": { + "type": ["null", "object"], + "properties": { + "trend": { + "type": ["null", "integer"] + }, + "abs": { + "type": ["null", "integer"] + }, + "per": { + "type": ["null", "number"] + } + } + }, + "keyword": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/marketshare_value_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/marketshare_value_s7.json new file mode 100644 index 000000000000..85c4bd986b09 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/marketshare_value_s7.json @@ -0,0 +1,38 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "date": { + "type": ["null", "string"], + "format": "date" + }, + "data": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "domain": { + "type": ["null", "string"] + }, + "marketshare": { + "type": ["null", "integer"] + }, + "trend": { + "type": ["null", "object"], + "properties": { + "trend": { + "type": ["null", "integer"] + }, + "abs": { + "type": ["null", "string"] + }, + "per": { + "type": ["null", "integer"] + } + } + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/projects.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/projects.json new file mode 100644 index 000000000000..cb0884960dec --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/projects.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "project_id": { + "type": ["null", "integer"] + }, + "project_name": { + "type": ["null", "string"] + }, + "project_url": { + "type": ["null", "string"] + }, + "keyword_count": { + "type": ["null", "integer"] + }, + "engine_count": { + "type": ["null", "integer"] + }, + "engines": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/seo_visibility_value_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/seo_visibility_value_s7.json new file mode 100644 index 000000000000..2468104da80d --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/seo_visibility_value_s7.json @@ -0,0 +1,33 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "trend": { + "type": ["null", "object"], + "properties": { + "trend": { + "type": ["null", "integer"] + }, + "abs": { + "type": ["null", "integer"] + }, + "per": { + "type": ["null", "number"] + } + } + }, + "domain": { + "type": ["null", "string"] + }, + "date": { + "type": ["null", "integer"], + "format": "date" + }, + "sum_visibility": { + "type": ["null", "integer"] + }, + "count_keywords": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/serp_spread_value_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/serp_spread_value_s7.json new file mode 100644 index 000000000000..ea9579aa2f1f --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/serp_spread_value_s7.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "data": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "unranked": { + "type": ["null", "integer"] + }, + "pos_1_10": { + "type": ["null", "integer"] + }, + "pos_11_20": { + "type": ["null", "integer"] + }, + "pos_20+": { + "type": ["null", "integer"] + } + } + } + }, + "total": { + "type": ["null", "integer"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/tag_potentials_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/tag_potentials_s7.json new file mode 100644 index 000000000000..5c1773dd9030 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/tag_potentials_s7.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "keywords": { + "type": ["null", "integer"] + }, + "rankings": { + "type": ["null", "integer"] + }, + "traffic": { + "type": ["null", "integer"] + }, + "traffic_max": { + "type": ["null", "integer"] + }, + "traffic_volume": { + "type": ["null", "integer"] + }, + "max_potential": { + "type": ["null", "integer"] + }, + "date": { + "type": ["null", "integer"] + }, + "winner": { + "type": ["null", "integer"] + }, + "loser": { + "type": ["null", "integer"] + }, + "tag": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/tags.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/tags.json new file mode 100644 index 000000000000..9dc4cbf9e101 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/tags.json @@ -0,0 +1,33 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "project_id": { + "type": ["null", "integer"] + }, + "id": { + "type": ["null", "string"] + }, + "parent_id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "tag_name_path": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "level": { + "type": ["null", "integer"] + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/url_rankings_s7.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/url_rankings_s7.json new file mode 100644 index 000000000000..22567a891a03 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/schemas/url_rankings_s7.json @@ -0,0 +1,87 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "pos": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "position": { + "type": ["null", "integer", "string"] + }, + "page": { + "type": ["null", "integer", "string"] + }, + "directory": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "visibility": { + "type": ["null", "integer", "string"] + }, + "traffic": { + "type": ["null", "integer", "string"] + }, + "ad_budget": { + "type": ["null", "number", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "traffic_volume": { + "type": ["null", "integer"] + }, + "cpc": { + "type": ["null", "number"] + }, + "delta_traffic": { + "type": ["null", "integer", "string"] + }, + "trend": { + "type": ["null", "object"], + "properties": { + "trend": { + "type": ["null", "integer", "string"] + }, + "abs": { + "type": ["null", "integer", "string"] + }, + "per": { + "type": ["null", "number", "string"] + } + } + }, + "tags": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + } + } + } + }, + "keyword": { + "type": ["null", "string"] + }, + "tags": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "delete_date": { + "type": ["null", "string"] + }, + "date": { + "type": ["null", "integer"], + "format": "date" + }, + "trend_date": { + "type": ["null", "integer"], + "format": "date" + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/source.py b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/source.py new file mode 100644 index 000000000000..329a97b9d550 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/source.py @@ -0,0 +1,360 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import base64 +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import pendulum +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import Oauth2Authenticator + +from .utils import to_datetime_str + + +class SearchMetricsStream(HttpStream, ABC): + primary_key = None + page_size = 250 + url_base = "https://api.searchmetrics.com/v4/" + + def __init__(self, config: Mapping[str, Any]): + super().__init__(authenticator=config["authenticator"]) + self.config = config + self.start_date = config["start_date"] + self.window_in_days = config.get("window_in_days", 30) + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: + return { + "project_id": stream_slice["project_id"], + "se_id": stream_slice["engine"], + "urls": stream_slice["project_url"], + "url": stream_slice["project_url"], + "domain": stream_slice["project_url"], + "countrycode": self.config["country_code"], + "limit": self.page_size, + } + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data = response.json().get("response", []) + + if isinstance(data, list): + data = data + elif isinstance(data, dict): + data = [data] + + for record in data: + yield record + + def should_retry(self, response: requests.Response) -> bool: + rankings_not_yet_calculated = response.status_code == 400 and "Rankings not yet calculated" in response.json()["error_message"] + insufficient_credits_to_make_this_service_request = ( + response.status_code == 403 and "Insufficient credits to make this service request" in response.json()["error_message"] + ) + + if rankings_not_yet_calculated or insufficient_credits_to_make_this_service_request: + self.logger.error(f"{response.json()['error_message']}") + self.raise_on_http_errors = False + + return super().should_retry(response) + + def raise_on_http_errors(self) -> bool: + return True + + +class ChildStreamMixin: + parent_stream_class: Optional[SearchMetricsStream] = None + + def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]: + for item in self.parent_stream_class(config=self.config).read_records(sync_mode=None): + for engine in item["engines"]: + yield {"project_id": item["project_id"], "engine": engine, "project_url": item["project_url"]} + + yield from [] + + +class Projects(SearchMetricsStream): + primary_key = "project_id" + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: + return {} + + def path(self, **kwargs) -> str: + return "AdminStatusGetListProjects.json" + + +class ProjectsChildStream(ChildStreamMixin): + parent_stream_class = Projects + + +class IncrementalSearchMetricsStream(ProjectsChildStream, SearchMetricsStream): + cursor_field = "date" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice, next_page_token) + params["date_from"] = stream_slice["date_from"] + params["date_to"] = stream_slice["date_to"] + return params + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: + return { + self.cursor_field: max( + str(latest_record.get(self.cursor_field, self.start_date)), + str(current_stream_state.get(self.cursor_field, self.start_date)), + ) + } + + def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: + """ + Override default stream_slices CDK method to provide date_slices as page chunks for data fetch. + Returns list of dict, example: [{ + "date_from": "20200101", + "date_to": "20210102" + }, + { + "date_from": "20200103", + "date_to": "20210104" + }, + ...] + """ + + for stream_slice in super().stream_slices(**kwargs): + start_date = pendulum.parse(self.start_date).date() + end_date = pendulum.now().date() + + # Determine stream_state, if no stream_state we use start_date + if stream_state: + start_date = pendulum.parse(stream_state.get(self.cursor_field)).date() + + # use the lowest date between start_date and self.end_date, otherwise API fails if start_date is in future + start_date = min(start_date, end_date) + date_slices = [] + + while start_date <= end_date: + end_date_slice = start_date.add(days=self.window_in_days) + stream_slice.update({"date_from": to_datetime_str(start_date), "date_to": to_datetime_str(min(end_date_slice, end_date))}) + date_slices.append(stream_slice) + # add 1 day for start next slice from next day and not duplicate data from previous slice end date. + start_date = end_date_slice.add(days=1) + + return date_slices + + +class Tags(ProjectsChildStream, SearchMetricsStream): + primary_key = "id" + + def path(self, **kwargs) -> str: + return "AdminStatusGetListProjectTags.json" + + +class BenchmarkRankingsS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListBenchmarkRankingsS7.json" + + +class CountDomainKeyword(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ResearchOrganicGetCountDomainKeyword.json" + + +class CompetitorRankingsS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListCompetitorRankingsS7.json" + + +class DistributionKeywordsS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListDistributionKeywordsS7.json" + + +class KeywordPotentialsS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListKeywordPotentialsS7.json" + + +class TagPotentialsS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListTagPotentialsS7.json" + + +class UrlRankingsS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListUrlRankingsS7.json" + + +class MarketshareValueS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetValueMarketshareS7.json" + + +class SeoVisibilityValueS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetValueSeoVisibilityS7.json" + + +class SerpSpreadValueS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetValueSerpSpreadS7.json" + + +class ListCompetitors(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ResearchOrganicGetListCompetitors.json" + + +class ListCompetitorsRelevancy(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ResearchOrganicGetListCompetitorsRelevancy.json" + + +class ListRankingsDomain(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ResearchOrganicGetListRankingsDomain.json" + + +class ListSeoVisibilityCountry(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ResearchOrganicGetListSeoVisibilityCountry.json" + + +class ListLosersS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListLosersS7.json" + + +class ListMarketShareS7(IncrementalSearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListMarketShareS7.json" + + +class ListPositionSpreadHistoricS7(IncrementalSearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListPositionSpreadHistoricS7.json" + + +class ListSeoVisibilityHistoricS7(IncrementalSearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListSeoVisibilityHistoricS7.json" + + +class ListRankingsAnalysisS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListRankingsAnalysisS7.json" + + +class ListWinnersS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListWinnersS7.json" + + +class ListRankingsHistoricS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListRankingsHistoricS7.json" + + +class ListSerpSpreadS7(ProjectsChildStream, SearchMetricsStream): + def path(self, **kwargs) -> str: + return "ProjectOrganicGetListSerpSpreadS7.json" + + +class SearchMetricsAuthenticator(Oauth2Authenticator): + def __init__(self, config): + super().__init__( + token_refresh_endpoint="https://api.searchmetrics.com/v4/token", + client_id=config["api_key"], + client_secret=config["client_secret"], + refresh_token=None, + ) + + def get_refresh_request_body(self) -> Mapping[str, Any]: + payload: MutableMapping[str, Any] = {"grant_type": "client_credentials"} + + return payload + + def get_refresh_request_headers(self) -> Mapping[str, Any]: + encoded_credentials = base64.b64encode(f"{self.client_id}:{self.client_secret}".encode("ascii")) + headers: MutableMapping[str, Any] = {"Accept": "application/json", "Authorization": f"Basic {encoded_credentials.decode('utf-8')}"} + + return headers + + def refresh_access_token(self) -> Tuple[str, int]: + """ + Returns a tuple of (access_token, token_lifespan_in_seconds) + """ + try: + response = requests.request( + method="POST", + url=self.token_refresh_endpoint, + headers=self.get_refresh_request_headers(), + data=self.get_refresh_request_body(), + ) + response.raise_for_status() + response_json = response.json() + return response_json["access_token"], response_json["expires_in"] + except Exception as e: + raise Exception(f"Error while refreshing access token: {e}") from e + + +# Source +class SourceSearchMetrics(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + """ + Testing connection availability for the connector by granting the credentials. + """ + authenticator = SearchMetricsAuthenticator(config) + + try: + url = "https://api.searchmetrics.com/v4/AdminStatusGetListProjects.json" + + auth_headers = {"Accept": "application/json", **authenticator.get_auth_header()} + session = requests.get(url, headers=auth_headers) + session.raise_for_status() + + return True, None + except requests.exceptions.RequestException as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + config["authenticator"] = SearchMetricsAuthenticator(config) + return [ + BenchmarkRankingsS7(config), + CompetitorRankingsS7(config), + CountDomainKeyword(config), + DistributionKeywordsS7(config), + KeywordPotentialsS7(config), + ListCompetitors(config), + ListCompetitorsRelevancy(config), + ListLosersS7(config), + ListMarketShareS7(config), + ListPositionSpreadHistoricS7(config), + ListRankingsDomain(config), + ListRankingsHistoricS7(config), + ListSeoVisibilityCountry(config), + ListSeoVisibilityHistoricS7(config), + ListSerpSpreadS7(config), + ListWinnersS7(config), + Projects(config), + SeoVisibilityValueS7(config), + SerpSpreadValueS7(config), + TagPotentialsS7(config), + Tags(config), + UrlRankingsS7(config), + ] diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/spec.json b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/spec.json new file mode 100644 index 000000000000..c33c10d6d6a3 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/spec.json @@ -0,0 +1,71 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/seacrh-metrics", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Source Search Metrics Spec", + "type": "object", + "required": ["api_key", "client_secret", "country_code", "start_date"], + "additionalProperties": true, + "properties": { + "api_key": { + "title": "API Key", + "type": "string", + "description": "", + "airbyte_secret": true + }, + "client_secret": { + "title": "Client Secret", + "type": "string", + "description": "", + "airbyte_secret": true + }, + "country_code": { + "title": "Country Code", + "type": "string", + "default": "", + "description": "The region of the S3 staging bucket to use if utilising a copy strategy.", + "enum": [ + "", + "AR", + "AU", + "AT", + "BE", + "BR", + "CA", + "CN", + "CO", + "DK", + "FI", + "FR", + "DE", + "HK", + "IN", + "IE", + "IT", + "JP", + "MX", + "NL", + "NO", + "PL", + "RU", + "SG", + "ZA", + "ES", + "SE", + "CH", + "TR", + "US", + "GB" + ], + "order": 2 + }, + "start_date": { + "title": "Start Date", + "type": "string", + "description": "Data generated in SearchMetrics after this date will be replicated. This date must be specified in the format YYYY-MM-DDT00:00:00Z.", + "examples": ["20200925"], + "pattern": "^[0-9]{4}[0-9]{2}[0-9]{2}$" + } + } + } +} diff --git a/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/utils.py b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/utils.py new file mode 100644 index 000000000000..0d204a397312 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/source_search_metrics/utils.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from datetime import datetime + + +def to_datetime_str(date: datetime) -> str: + """ + Returns the formated datetime string. + :: Output example: '20210715T' FORMAT : "%Y%m%d" + """ + return date.strftime("%Y%m%d") diff --git a/airbyte-integrations/connectors/source-search-metrics/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-search-metrics/unit_tests/unit_test.py new file mode 100644 index 000000000000..fd47175daf70 --- /dev/null +++ b/airbyte-integrations/connectors/source-search-metrics/unit_tests/unit_test.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +def test_example(): + """Example of unit test""" + pass diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 26d9b2226dd0..b5a58235587d 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -125,6 +125,7 @@ * [Redshift](integrations/sources/redshift.md) * [S3](integrations/sources/s3.md) * [SAP Business One](integrations/sources/sap-business-one.md) + * [SearchMetrics](integrations/sources/search-metrics.md) * [Salesforce](integrations/sources/salesforce.md) * [SalesLoft](integrations/sources/salesloft.md) * [Sendgrid](integrations/sources/sendgrid.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 9d4b0eadcf9e..b2b3fbde9068 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -102,6 +102,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [Salesforce](sources/salesforce.md) | Certified | | [Salesloft](./sources/salesloft.md)| Alpha | | [SAP Business One](sources/sap-business-one.md) | Beta | +| [SearchMetrics](./sources/search-metrics.md)| Alpha | | [Sendgrid](sources/sendgrid.md) | Certified | | [Sentry](sources/sentry.md) | Alpha | | [Shopify](sources/shopify.md) | Certified | diff --git a/docs/integrations/sources/search-metrics.md b/docs/integrations/sources/search-metrics.md new file mode 100644 index 000000000000..07ce1f58e709 --- /dev/null +++ b/docs/integrations/sources/search-metrics.md @@ -0,0 +1,65 @@ +# SearchMetrics + +## Overview + +The SearchMetrics source supports both Full Refresh and Incremental syncs. You can choose if this connector will copy only the new or updated data, or all rows in the tables and columns you set up for replication, every time a sync is run. + +This Source Connector is based on a [Airbyte CDK](https://docs.airbyte.io/connector-development/cdk-python). + +### Output schema + +Several output streams are available from this source: + +* [Projects](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQwODQ5ODE-get-list-projects) \(Full table\) + * [BenchmarkRankingsS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NDY-get-list-benchmark-rankings-s7) \(Full table\) + * [CompetitorRankingsS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NDc-get-list-competitor-rankings-s7) \(Full table\) + * [DistributionKeywordsS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NDg-get-list-distribution-keywords-s7) \(Full table\) + * [KeywordPotentialsS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NTA-get-list-keyword-potentials-s7) \(Full table\) + * [ListCompetitors](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQwODQ5OTI-get-list-competitors) \(Full table\) + * [ListCompetitorsRelevancy](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQxODQxNjU-get-list-competitors-relevancy) \(Full table\) + * [ListLosersS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NTE-get-list-losers-s7) \(Full table\) + * [ListMarketShareS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NTI-get-list-market-share-s7) \(Incremental\) + * [ListPositionSpreadHistoricS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NTM-get-list-position-spread-historic-s7) \(Incremental\) + * [ListRankingsDomain](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQwODQ5OTg-get-list-rankings-domain) \(Full table\) + * [ListRankingsHistoricS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NTY-get-list-rankings-historic-s7) \(Full table\) + * [ListSeoVisibilityCountry](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQyMjg4NDk-get-list-seo-visibility-country) \(Full table\) + * [ListSeoVisibilityHistoricS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NTc-get-list-seo-visibility-historic-s7) \(Incremental\) + * [ListSerpSpreadS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NTg-get-list-serp-spread-s7) \(Full table\) + * [ListWinnersS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NjQ-get-list-winners-s7) \(Full table\) + * [SeoVisibilityValueS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQyMzQzMjk-get-value-seo-visibility) \(Full table\) + * [SerpSpreadValueS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0Njc-get-value-serp-spread-s7) \(Full table\) + * [TagPotentialsS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NTk-get-list-tag-potentials-s7) \(Full table\) + * [Tags](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjE4NzQ0ODMz-get-list-project-tags) \(Full table\) + * [UrlRankingsS7](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQzNjc0NjM-get-list-url-rankings-s7) \(Full table\) + +If there are more endpoints you'd like Airbyte to support, please [create an issue.](https://github.com/airbytehq/airbyte/issues/new/choose) + +### Features + +| Feature | Supported? | +| :--- | :--- | +| Full Refresh Sync | Yes | +| Incremental - Append Sync | Yes | +| SSL connection | Yes | +| Namespaces | No | + + +The SearchMetrics connector should not run into SearchMetrics API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. + +## Getting started + +### Requirements + +* SearchMetrics Client Secret +* SearchMetrics API Key + +### Setup guide + +Please read [How to get your API Key and Client Secret](https://developer.searchmetrics.com/docs/apiv4-documentation/ZG9jOjQ2Nzk1-getting-started) . + +## Changelog + +| Version | Date | Pull Request | Subject | +| :------ | :-------- | :----- | :------ | +| 0.1.1 | 2021-12-22 | [6992](https://github.com/airbytehq/airbyte/pull/6992) | Deleted windows in days from config | +| 0.1.0 | 2021-10-13 | [6992](https://github.com/airbytehq/airbyte/pull/6992) | Release SearchMetrics CDK Connector |