From 9acfc819dc02f77cc9533fb20b572279d9b60a4a Mon Sep 17 00:00:00 2001 From: Ka-Ping Yee Date: Fri, 17 Dec 2021 14:59:54 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20Source=20Facebook=20Marketing:?= =?UTF-8?q?=20Add=20the=20option=20to=20fetch=20thumbnail=20image=20data.?= =?UTF-8?q?=20(#8649)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(zendesk): Add Brands and CustomRoles * feat(zendesk): add incremental unsorted cursor stream implement IncrementalUnsortedCursorStream to ticket_metrics * feat(zendesk): use sorted cursor pagination for ticket comments and macros * feat(zendesk): use unsorted cursor stream for groups, group memberships and satisfaction ratings * fix(zendesk): use safe method to get value from nested dict * style(zendesk): reformat using gradlew * fix(zendesk): format created_at and updated_at to date-time format * feat(zendesk): add business hours schedule * bump connector version * bump dockerfile version * reset * resolve webapp files Co-authored-by: asyarif93 Co-authored-by: Marcos Marx --- .../e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 14 +++++--- .../source-facebook-marketing/Dockerfile | 2 +- .../integration_tests/spec.json | 10 ++++-- .../schemas/ad_creatives.json | 3 ++ .../source_facebook_marketing/source.py | 10 ++++-- .../source_facebook_marketing/streams.py | 34 ++++++++++++++++--- .../sources/facebook-marketing.md | 1 + 9 files changed, 61 insertions(+), 17 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json index 92867ce4f150..591f2e7982bc 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e7778cfc-e97c-4458-9ecb-b4f2bba8946c", "name": "Facebook Marketing", "dockerRepository": "airbyte/source-facebook-marketing", - "dockerImageTag": "0.2.27", + "dockerImageTag": "0.2.29", "documentationUrl": "https://docs.airbyte.io/integrations/sources/facebook-marketing", "icon": "facebook.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index eec260e71181..ff08e0f3116d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -175,7 +175,7 @@ - name: Facebook Marketing sourceDefinitionId: e7778cfc-e97c-4458-9ecb-b4f2bba8946c dockerRepository: airbyte/source-facebook-marketing - dockerImageTag: 0.2.28 + dockerImageTag: 0.2.29 documentationUrl: https://docs.airbyte.io/integrations/sources/facebook-marketing icon: facebook.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 7e6d202db399..b135f495dfc6 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1428,7 +1428,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-facebook-marketing:0.2.28" +- dockerImage: "airbyte/source-facebook-marketing:0.2.29" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" changelogUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" @@ -1469,9 +1469,15 @@ - "2017-01-26T00:00:00Z" type: "string" format: "date-time" + fetch_thumbnail_images: + title: "Fetch Thumbnail Images" + description: "In each Ad Creative, fetch the thumbnail_url and store the\ + \ result in thumbnail_data_url" + default: false + type: "boolean" include_deleted: title: "Include Deleted" - description: "Include data from deleted campaigns, ads, and adsets." + description: "Include data from deleted campaigns, ads, and adsets" default: false type: "boolean" insights_lookback_window: @@ -1483,8 +1489,8 @@ type: "integer" insights_days_per_job: title: "Insights Days Per Job" - description: "Number of days to sync in one job. The more data you have\ - \ - the smaller you want this parameter to be." + description: "Number of days to sync in one job (the more data you have,\ + \ the smaller this parameter should be)" default: 7 minimum: 1 maximum: 30 diff --git a/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile b/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile index b6989bcf6eb1..24f71275ff24 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile +++ b/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.28 +LABEL io.airbyte.version=0.2.29 LABEL io.airbyte.name=airbyte/source-facebook-marketing diff --git a/airbyte-integrations/connectors/source-facebook-marketing/integration_tests/spec.json b/airbyte-integrations/connectors/source-facebook-marketing/integration_tests/spec.json index 2c6b7bbcd205..1f4e623f6368 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-facebook-marketing/integration_tests/spec.json @@ -32,9 +32,15 @@ "type": "string", "format": "date-time" }, + "fetch_thumbnail_images": { + "title": "Fetch Thumbnail Images", + "description": "In each Ad Creative, fetch the thumbnail_url and store the result in thumbnail_data_url", + "default": false, + "type": "boolean" + }, "include_deleted": { "title": "Include Deleted", - "description": "Include data from deleted campaigns, ads, and adsets.", + "description": "Include data from deleted campaigns, ads, and adsets", "default": false, "type": "boolean" }, @@ -48,7 +54,7 @@ }, "insights_days_per_job": { "title": "Insights Days Per Job", - "description": "Number of days to sync in one job. The more data you have - the smaller you want this parameter to be.", + "description": "Number of days to sync in one job (the more data you have, the smaller this parameter should be)", "default": 7, "minimum": 1, "maximum": 30, diff --git a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/schemas/ad_creatives.json b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/schemas/ad_creatives.json index eb443dab21a7..a33dfd4c58b7 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/schemas/ad_creatives.json +++ b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/schemas/ad_creatives.json @@ -835,6 +835,9 @@ } } }, + "thumbnail_data_url": { + "type": ["null", "string"] + }, "thumbnail_url": { "type": ["null", "string"] }, diff --git a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/source.py b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/source.py index bccebefc2c5d..72f1328a7151 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/source.py +++ b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/source.py @@ -76,7 +76,11 @@ class Config: default_factory=pendulum.now, ) - include_deleted: bool = Field(default=False, description="Include data from deleted campaigns, ads, and adsets.") + fetch_thumbnail_images: bool = Field( + default=False, description="In each Ad Creative, fetch the thumbnail_url and store the result in thumbnail_data_url" + ) + + include_deleted: bool = Field(default=False, description="Include data from deleted campaigns, ads, and adsets") insights_lookback_window: int = Field( default=28, @@ -87,7 +91,7 @@ class Config: insights_days_per_job: int = Field( default=7, - description="Number of days to sync in one job. The more data you have - the smaller you want this parameter to be.", + description="Number of days to sync in one job (the more data you have, the smaller this parameter should be)", minimum=1, maximum=30, ) @@ -137,7 +141,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Type[Stream]]: Campaigns(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted), AdSets(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted), Ads(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted), - AdCreatives(api=api), + AdCreatives(api=api, fetch_thumbnail_images=config.fetch_thumbnail_images), AdsInsights(**insights_args), AdsInsightsAgeAndGender(**insights_args), AdsInsightsCountry(**insights_args), diff --git a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py index 5930bf3bae0f..fe0013a8966e 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py +++ b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py @@ -2,6 +2,7 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # +import base64 import time import urllib.parse as urlparse from abc import ABC @@ -12,6 +13,7 @@ import airbyte_cdk.sources.utils.casing as casing import backoff import pendulum +import requests from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.core import package_name_from_class @@ -43,6 +45,18 @@ def remove_params_from_url(url: str, params: List[str]) -> str: ) +def fetch_thumbnail_data_url(url: str) -> str: + try: + response = requests.get(url) + if response.status_code == 200: + type = response.headers["content-type"] + data = base64.b64encode(response.content) + return f"data:{type};base64,{data.decode('ascii')}" + except requests.exceptions.RequestException: + pass + return None + + class FBMarketingStream(Stream, ABC): """Base stream class""" @@ -198,6 +212,10 @@ class AdCreatives(FBMarketingStream): entity_prefix = "adcreative" batch_size = 50 + def __init__(self, fetch_thumbnail_images: bool = False, **kwargs): + super().__init__(**kwargs) + self._fetch_thumbnail_images = fetch_thumbnail_images + def read_records( self, sync_mode: SyncMode, @@ -207,17 +225,23 @@ def read_records( ) -> Iterable[Mapping[str, Any]]: """Read records using batch API""" records = self._read_records(params=self.request_params(stream_state=stream_state)) - requests = [record.api_get(fields=self.fields, pending=True) for record in records] + # "thumbnail_data_url" is a field in our stream's schema because we + # output it (see fix_thumbnail_urls below), but it's not a field that + # we can request from Facebook + request_fields = [f for f in self.fields if f != "thumbnail_data_url"] + requests = [record.api_get(fields=request_fields, pending=True) for record in records] for requests_batch in batch(requests, size=self.batch_size): for record in self.execute_in_batch(requests_batch): - yield self.clear_urls(record) + yield self.fix_thumbnail_urls(record) - @staticmethod - def clear_urls(record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: - """Some URLs has random values, these values doesn't affect validity of URLs, but breaks SAT""" + def fix_thumbnail_urls(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + """Cleans and, if enabled, fetches thumbnail URLs for each creative.""" + # The thumbnail_url contains some extra query parameters that don't affect the validity of the URL, but break SAT thumbnail_url = record.get("thumbnail_url") if thumbnail_url: record["thumbnail_url"] = remove_params_from_url(thumbnail_url, ["_nc_hash", "d"]) + if self._fetch_thumbnail_images: + record["thumbnail_data_url"] = fetch_thumbnail_data_url(thumbnail_url) return record @backoff_policy diff --git a/docs/integrations/sources/facebook-marketing.md b/docs/integrations/sources/facebook-marketing.md index ee984c55c114..e54659560eb2 100644 --- a/docs/integrations/sources/facebook-marketing.md +++ b/docs/integrations/sources/facebook-marketing.md @@ -96,6 +96,7 @@ As a summary, custom insights allows to replicate only some fields, resulting in | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.29 | 2021-12-17 | [8649](https://github.com/airbytehq/airbyte/pull/8649) | Retrive ad_creatives image as data encoded | | 0.2.28 | 2021-12-13 | [8742](https://github.com/airbytehq/airbyte/pull/8742) | Fix for schema generation related to "breakdown" fields | | 0.2.27 | 2021-11-29 | [8257](https://github.com/airbytehq/airbyte/pull/8257) | Add fields to Campaign stream | | 0.2.26 | 2021-11-19 | [7855](https://github.com/airbytehq/airbyte/pull/7855) | Add Video stream |