Skip to content

Commit

Permalink
Source Facebook Marketing: Fix number and integer fields in schemas (#…
Browse files Browse the repository at this point in the history
…7605)

* Add job retry logics to AdsInsights stream.

* Add ad_creatives.thumbnail_url to ignored_fields in full_refresh SAT

* Update wait_for_job condition
  • Loading branch information
htrueman authored Nov 5, 2021
1 parent c6edf13 commit 23f3d3e
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ tests:
# Because one read response contains this metric, and other doesn't.
# Therefore, it's needed to ignore fields like this in API responses.
ignored_fields:
"ads_insights_age_and_gender": ["cost_per_estimated_ad_recallers"]
"ads_insights_age_and_gender": ["cost_per_estimated_ad_recallers"]
"ad_creatives": ["thumbnail_url"]
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@ class AdsInsights(FBMarketingIncrementalStream):
action_attribution_windows = ALL_ACTION_ATTRIBUTION_WINDOWS
time_increment = 1

running_jobs = deque()
times_job_restarted = {}

breakdowns = []

def __init__(
Expand Down Expand Up @@ -327,7 +330,7 @@ def read_records(
stream_state: Mapping[str, Any] = None,
) -> Iterable[Mapping[str, Any]]:
"""Waits for current job to finish (slice) and yield its result"""
result = self.wait_for_job(stream_slice["job"])
result = self.wait_for_job(stream_slice["job"], stream_state=stream_state)
# because we query `lookback_window` days before actual cursor we might get records older then cursor

for obj in result.get_result():
Expand All @@ -341,20 +344,19 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Ite
3. we shouldn't proceed to consumption of the next job before previous succeed
"""
stream_state = stream_state or {}
running_jobs = deque()
date_ranges = list(self._date_ranges(stream_state=stream_state))
for params in date_ranges:
params = deep_merge(params, self.request_params(stream_state=stream_state))
job = self._create_insights_job(params)
running_jobs.append(job)
if len(running_jobs) >= self.MAX_ASYNC_JOBS:
yield {"job": running_jobs.popleft()}
self.running_jobs.append(job)
if len(self.running_jobs) >= self.MAX_ASYNC_JOBS:
yield {"job": self.running_jobs.popleft()}

while running_jobs:
yield {"job": running_jobs.popleft()}
while self.running_jobs:
yield {"job": self.running_jobs.popleft()}

@backoff_policy
def wait_for_job(self, job) -> AdReportRun:
def wait_for_job(self, job, stream_state: Mapping[str, Any] = None) -> AdReportRun:
factor = 2
start_time = pendulum.now()
sleep_seconds = factor
Expand All @@ -367,10 +369,20 @@ def wait_for_job(self, job) -> AdReportRun:

if job["async_status"] == "Job Completed":
return job
elif job["async_status"] == "Job Failed":
raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.")
elif job["async_status"] == "Job Skipped":
raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.")
elif job["async_status"] in ["Job Failed", "Job Skipped"]:
time_range = (job["date_start"], job["date_stop"])
if self.times_job_restarted.get(time_range, 0) < 6:
params = deep_merge(
{"time_range": {"since": job["date_start"], "until": job["date_stop"]}},
self.request_params(stream_state=stream_state),
)
restart_job = self._create_insights_job(params)
self.running_jobs.append(restart_job)
self.times_job_restarted[time_range] += 1
elif job["async_status"] == "Job Failed":
raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.")
elif job["async_status"] == "Job Skipped":
raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.")

if runtime > self.MAX_WAIT_TO_START and job_progress_pct == 0:
raise JobTimeoutException(
Expand Down

0 comments on commit 23f3d3e

Please sign in to comment.