Skip to content

Commit

Permalink
🎉 Source Facebook Marketing: Add the option to fetch thumbnail image …
Browse files Browse the repository at this point in the history
…data. (#8649)

* feat(zendesk): Add Brands and CustomRoles

* feat(zendesk): add incremental unsorted cursor stream
implement IncrementalUnsortedCursorStream to ticket_metrics

* feat(zendesk): use sorted cursor pagination
for ticket comments and macros

* feat(zendesk): use unsorted cursor stream
for groups, group memberships and satisfaction ratings

* fix(zendesk): use safe method to get value from nested dict

* style(zendesk): reformat using gradlew

* fix(zendesk): format created_at and updated_at to date-time format

* feat(zendesk): add business hours schedule

* bump connector version

* bump dockerfile version

* reset

* resolve webapp files

Co-authored-by: asyarif93 <asyarif93@gmail.com>
Co-authored-by: Marcos Marx <marcosmarxm@gmail.com>
  • Loading branch information
3 people authored Dec 17, 2021
1 parent 2eace67 commit 9acfc81
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sourceDefinitionId": "e7778cfc-e97c-4458-9ecb-b4f2bba8946c",
"name": "Facebook Marketing",
"dockerRepository": "airbyte/source-facebook-marketing",
"dockerImageTag": "0.2.27",
"dockerImageTag": "0.2.29",
"documentationUrl": "https://docs.airbyte.io/integrations/sources/facebook-marketing",
"icon": "facebook.svg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@
- name: Facebook Marketing
sourceDefinitionId: e7778cfc-e97c-4458-9ecb-b4f2bba8946c
dockerRepository: airbyte/source-facebook-marketing
dockerImageTag: 0.2.28
dockerImageTag: 0.2.29
documentationUrl: https://docs.airbyte.io/integrations/sources/facebook-marketing
icon: facebook.svg
sourceType: api
Expand Down
14 changes: 10 additions & 4 deletions airbyte-config/init/src/main/resources/seed/source_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1428,7 +1428,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-facebook-marketing:0.2.28"
- dockerImage: "airbyte/source-facebook-marketing:0.2.29"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing"
changelogUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing"
Expand Down Expand Up @@ -1469,9 +1469,15 @@
- "2017-01-26T00:00:00Z"
type: "string"
format: "date-time"
fetch_thumbnail_images:
title: "Fetch Thumbnail Images"
description: "In each Ad Creative, fetch the thumbnail_url and store the\
\ result in thumbnail_data_url"
default: false
type: "boolean"
include_deleted:
title: "Include Deleted"
description: "Include data from deleted campaigns, ads, and adsets."
description: "Include data from deleted campaigns, ads, and adsets"
default: false
type: "boolean"
insights_lookback_window:
Expand All @@ -1483,8 +1489,8 @@
type: "integer"
insights_days_per_job:
title: "Insights Days Per Job"
description: "Number of days to sync in one job. The more data you have\
\ - the smaller you want this parameter to be."
description: "Number of days to sync in one job (the more data you have,\
\ the smaller this parameter should be)"
default: 7
minimum: 1
maximum: 30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.28
LABEL io.airbyte.version=0.2.29
LABEL io.airbyte.name=airbyte/source-facebook-marketing
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,15 @@
"type": "string",
"format": "date-time"
},
"fetch_thumbnail_images": {
"title": "Fetch Thumbnail Images",
"description": "In each Ad Creative, fetch the thumbnail_url and store the result in thumbnail_data_url",
"default": false,
"type": "boolean"
},
"include_deleted": {
"title": "Include Deleted",
"description": "Include data from deleted campaigns, ads, and adsets.",
"description": "Include data from deleted campaigns, ads, and adsets",
"default": false,
"type": "boolean"
},
Expand All @@ -48,7 +54,7 @@
},
"insights_days_per_job": {
"title": "Insights Days Per Job",
"description": "Number of days to sync in one job. The more data you have - the smaller you want this parameter to be.",
"description": "Number of days to sync in one job (the more data you have, the smaller this parameter should be)",
"default": 7,
"minimum": 1,
"maximum": 30,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,9 @@
}
}
},
"thumbnail_data_url": {
"type": ["null", "string"]
},
"thumbnail_url": {
"type": ["null", "string"]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ class Config:
default_factory=pendulum.now,
)

include_deleted: bool = Field(default=False, description="Include data from deleted campaigns, ads, and adsets.")
fetch_thumbnail_images: bool = Field(
default=False, description="In each Ad Creative, fetch the thumbnail_url and store the result in thumbnail_data_url"
)

include_deleted: bool = Field(default=False, description="Include data from deleted campaigns, ads, and adsets")

insights_lookback_window: int = Field(
default=28,
Expand All @@ -87,7 +91,7 @@ class Config:

insights_days_per_job: int = Field(
default=7,
description="Number of days to sync in one job. The more data you have - the smaller you want this parameter to be.",
description="Number of days to sync in one job (the more data you have, the smaller this parameter should be)",
minimum=1,
maximum=30,
)
Expand Down Expand Up @@ -137,7 +141,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Type[Stream]]:
Campaigns(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted),
AdSets(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted),
Ads(api=api, start_date=config.start_date, end_date=config.end_date, include_deleted=config.include_deleted),
AdCreatives(api=api),
AdCreatives(api=api, fetch_thumbnail_images=config.fetch_thumbnail_images),
AdsInsights(**insights_args),
AdsInsightsAgeAndGender(**insights_args),
AdsInsightsCountry(**insights_args),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#

import base64
import time
import urllib.parse as urlparse
from abc import ABC
Expand All @@ -12,6 +13,7 @@
import airbyte_cdk.sources.utils.casing as casing
import backoff
import pendulum
import requests
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.core import package_name_from_class
Expand Down Expand Up @@ -43,6 +45,18 @@ def remove_params_from_url(url: str, params: List[str]) -> str:
)


def fetch_thumbnail_data_url(url: str) -> str:
try:
response = requests.get(url)
if response.status_code == 200:
type = response.headers["content-type"]
data = base64.b64encode(response.content)
return f"data:{type};base64,{data.decode('ascii')}"
except requests.exceptions.RequestException:
pass
return None


class FBMarketingStream(Stream, ABC):
"""Base stream class"""

Expand Down Expand Up @@ -198,6 +212,10 @@ class AdCreatives(FBMarketingStream):
entity_prefix = "adcreative"
batch_size = 50

def __init__(self, fetch_thumbnail_images: bool = False, **kwargs):
super().__init__(**kwargs)
self._fetch_thumbnail_images = fetch_thumbnail_images

def read_records(
self,
sync_mode: SyncMode,
Expand All @@ -207,17 +225,23 @@ def read_records(
) -> Iterable[Mapping[str, Any]]:
"""Read records using batch API"""
records = self._read_records(params=self.request_params(stream_state=stream_state))
requests = [record.api_get(fields=self.fields, pending=True) for record in records]
# "thumbnail_data_url" is a field in our stream's schema because we
# output it (see fix_thumbnail_urls below), but it's not a field that
# we can request from Facebook
request_fields = [f for f in self.fields if f != "thumbnail_data_url"]
requests = [record.api_get(fields=request_fields, pending=True) for record in records]
for requests_batch in batch(requests, size=self.batch_size):
for record in self.execute_in_batch(requests_batch):
yield self.clear_urls(record)
yield self.fix_thumbnail_urls(record)

@staticmethod
def clear_urls(record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
"""Some URLs has random values, these values doesn't affect validity of URLs, but breaks SAT"""
def fix_thumbnail_urls(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
"""Cleans and, if enabled, fetches thumbnail URLs for each creative."""
# The thumbnail_url contains some extra query parameters that don't affect the validity of the URL, but break SAT
thumbnail_url = record.get("thumbnail_url")
if thumbnail_url:
record["thumbnail_url"] = remove_params_from_url(thumbnail_url, ["_nc_hash", "d"])
if self._fetch_thumbnail_images:
record["thumbnail_data_url"] = fetch_thumbnail_data_url(thumbnail_url)
return record

@backoff_policy
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/facebook-marketing.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ As a summary, custom insights allows to replicate only some fields, resulting in

| Version | Date | Pull Request | Subject |
| :--- | :--- | :--- | :--- |
| 0.2.29 | 2021-12-17 | [8649](https://github.com/airbytehq/airbyte/pull/8649) | Retrive ad_creatives image as data encoded |
| 0.2.28 | 2021-12-13 | [8742](https://github.com/airbytehq/airbyte/pull/8742) | Fix for schema generation related to "breakdown" fields |
| 0.2.27 | 2021-11-29 | [8257](https://github.com/airbytehq/airbyte/pull/8257) | Add fields to Campaign stream |
| 0.2.26 | 2021-11-19 | [7855](https://github.com/airbytehq/airbyte/pull/7855) | Add Video stream |
Expand Down

0 comments on commit 9acfc81

Please sign in to comment.