-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🎉 Google Ads improvement: Support user-specified queries #5302
Changes from 13 commits
7ba083b
0b57c89
7ba9a5c
0b343da
cbb274a
7c4a212
a4ddc06
5a27b95
00b9fa1
6a2d3d7
7e4c397
2c9efa3
b297a6a
80c6e2d
99c8e02
4e17d84
c7256c6
c31c3fd
78adddf
e17a320
14e80e1
f43c29f
5775511
a7abb83
dd737dd
ad8b2c0
baae471
1ce8bf7
a4685dc
bbd678f
7fa232f
a7b8916
0ba4690
9aaab72
2260ca2
3ee960b
c5d55de
612ffc3
22be66e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -22,6 +22,7 @@ | |||||
# SOFTWARE. | ||||||
# | ||||||
|
||||||
|
||||||
from enum import Enum | ||||||
from typing import Any, List, Mapping | ||||||
|
||||||
|
@@ -62,6 +63,7 @@ def send_request(self, query: str) -> SearchGoogleAdsResponse: | |||||
@staticmethod | ||||||
def get_fields_from_schema(schema: Mapping[str, Any]) -> List[str]: | ||||||
properties = schema.get("properties") | ||||||
# return list(properties.keys()) will be more clear ? | ||||||
return [*properties] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
@staticmethod | ||||||
|
@@ -148,7 +150,17 @@ def get_field_value(field_value: GoogleAdsRow, field: str) -> str: | |||||
return field_value | ||||||
|
||||||
@staticmethod | ||||||
def parse_single_result(schema: Mapping[str, Any], result: GoogleAdsRow): | ||||||
fields = GoogleAds.get_fields_from_schema(schema) | ||||||
def process_query(query) -> List: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. annotations everywhere please |
||||||
query = query.lower().split("select")[1].split("from")[0].strip() | ||||||
fields = query.split(",") | ||||||
fields = [i.strip() for i in fields] | ||||||
return fields | ||||||
|
||||||
@staticmethod | ||||||
def parse_single_result(schema: Mapping[str, Any], result: GoogleAdsRow, query: str = None): | ||||||
if not query: | ||||||
fields = GoogleAds.get_fields_from_schema(schema) | ||||||
else: | ||||||
fields = GoogleAds.process_query(query) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be part of the schema, i.e. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You've changed parse_single_result only for calling process_query. Why you can't do this directly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the second part (parse google protobuff response, structure without possibility to list the response fields) require those fields to call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is outdated and moved to get_fields_from_schema |
||||||
single_record = {field: GoogleAds.get_field_value(result, field) for field in fields} | ||||||
return single_record |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,10 +23,10 @@ | |
# | ||
|
||
|
||
from typing import Any, List, Mapping, Tuple | ||
from typing import Any, List, Mapping, MutableMapping, Tuple | ||
|
||
from airbyte_cdk import AirbyteLogger | ||
from airbyte_cdk.models import SyncMode | ||
from airbyte_cdk.models import AirbyteCatalog, AirbyteStream, SyncMode | ||
from airbyte_cdk.sources import AbstractSource | ||
from airbyte_cdk.sources.streams import Stream | ||
from google.ads.googleads.errors import GoogleAdsException | ||
|
@@ -39,13 +39,52 @@ | |
AdGroupAds, | ||
AdGroups, | ||
Campaigns, | ||
CustomQuery, | ||
CustomQueryFullRefresh, | ||
CustomQueryIncremental, | ||
DisplayKeywordPerformanceReport, | ||
DisplayTopicsPerformanceReport, | ||
ShoppingPerformanceReport, | ||
) | ||
|
||
|
||
class SourceGoogleAds(AbstractSource): | ||
def get_local_json_schema(self, config) -> MutableMapping[str, Any]: | ||
""" | ||
As agreed, now it returns the default schema (since read -> schema_generator.py may take hours for the end user). | ||
If we want to redesign json schema from raw query, this method need to be modified. | ||
""" | ||
local_json_schema = {"$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "additionalProperties": True} | ||
return local_json_schema | ||
|
||
def discover(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteCatalog: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need to override this method if you implement |
||
# streams = [stream.as_airbyte_stream() for stream in self.streams(config=config)] | ||
streams = [] | ||
for stream in self.streams(config=config): | ||
if not isinstance(stream, (CustomQueryFullRefresh, CustomQueryIncremental)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. checking for CustomQuery is enough, |
||
streams.append(stream.as_airbyte_stream()) | ||
# TODO: extend with custom defined streams | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if you did this here actually, is this still a relevant comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just a mark where the default discover logic was extended, will be removed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need to change it at all, just implement this in streams |
||
for usr_query in config.get("custom_query", []): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why don't you do this inside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need this to be made in discover, to fill schemas (dynamically, it is very important) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what stops you from doing this in streams??? this is what get_json_schema for |
||
local_cursor_field = ( | ||
[usr_query.get("cursor_field")] if isinstance(usr_query.get("cursor_field"), str) else usr_query.get("cursor_field") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think, it's better to set specification properly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the cursor field is already a property if we have generic incremental stream |
||
) | ||
stream = AirbyteStream( | ||
name=usr_query["table_name"], | ||
json_schema=self.get_local_json_schema(config=config), | ||
supported_sync_modes=[SyncMode.full_refresh], | ||
) | ||
if usr_query.get("cursor_field"): | ||
stream.source_defined_cursor = True # ??? | ||
stream.supported_sync_modes.append(SyncMode.incremental) # type: ignore | ||
stream.default_cursor_field = local_cursor_field | ||
|
||
keys = Stream._wrapped_primary_key(usr_query.get("primary_key") or None) # (!!! read empty strings as null aswell) | ||
if keys and len(keys) > 0: | ||
stream.source_defined_primary_key = keys | ||
streams.append(stream) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why properties of custom streams are missed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. they are defined when we create CustomQueryX instance, otherwice connector would not be able to work (it works) |
||
# end of TODO | ||
return AirbyteCatalog(streams=streams) | ||
|
||
def get_credentials(self, config: Mapping[str, Any]) -> Mapping[str, Any]: | ||
credentials = config["credentials"] | ||
|
||
|
@@ -69,6 +108,11 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: | |
incremental_stream_config = dict( | ||
api=google_api, conversion_window_days=config["conversion_window_days"], start_date=config["start_date"] | ||
) | ||
|
||
custom_query_streams = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it possible to validate custom queries in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can use this for validating queries. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. used regex in spec.json, since google query validator works on client (UI javascript) side |
||
CustomQuery(custom_query_config=config["custom_query"][i], **incremental_stream_config) | ||
for i in range(len(config.get("custom_query", []))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need range here??? |
||
] | ||
return [ | ||
AccountPerformanceReport(**incremental_stream_config), | ||
DisplayTopicsPerformanceReport(**incremental_stream_config), | ||
|
@@ -79,4 +123,4 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: | |
AdGroups(api=google_api), | ||
Accounts(api=google_api), | ||
Campaigns(api=google_api), | ||
] | ||
] + custom_query_streams |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -67,6 +67,31 @@ | |||||||
"maximum": 1095, | ||||||||
"default": 14, | ||||||||
"examples": [14] | ||||||||
}, | ||||||||
"custom_query": { | ||||||||
keu marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
"type": "array", | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
"items": { | ||||||||
"type": ["object", "null"], | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||
"properties": { | ||||||||
"query": { | ||||||||
"type": "string", | ||||||||
"title": "User defined query to build a report by wish", | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
"examples": ["SELECT segments.ad_destination_type FROM campaign"] | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If user wants to set several fields, how it should be separated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comma-separated, I will update example and will add one more field |
||||||||
}, | ||||||||
"primary_key": { | ||||||||
"type": ["string", "null"], | ||||||||
"title": "The unique field to be used as primary key in destination database (if provided)" | ||||||||
}, | ||||||||
"cursor_field": { | ||||||||
"type": ["string", "null"], | ||||||||
"title": "If not provided, the FULL-REFRESH model will be used. If provided, this will be an incremental stream with this cursor field. Please use datetime fields (start_date, end_date) for proper work." | ||||||||
vovavovavovavova marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
}, | ||||||||
"table_name": { | ||||||||
"type": "string", | ||||||||
"title": "The table name in your destination database for choosen query." | ||||||||
} | ||||||||
} | ||||||||
} | ||||||||
} | ||||||||
} | ||||||||
} | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -192,3 +192,73 @@ class ShoppingPerformanceReport(IncrementalGoogleAdsStream): | |
ShoppingPerformanceReport stream: https://developers.google.com/google-ads/api/fields/v8/shopping_performance_view | ||
Google Ads API field mapping: https://developers.google.com/google-ads/api/docs/migration/mapping#shopping_performance | ||
""" | ||
|
||
|
||
class CustomQueryFullRefresh(GoogleAdsStream): | ||
""" | ||
Class that should sync by custom user query to Google Ads API | ||
Fixme: check if WHERE>start_date was applied in standard fullrefresh stream. If yes, reapply here. | ||
""" | ||
|
||
def __init__(self, custom_query_config, **kwargs): | ||
self.custom_query_config = custom_query_config | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a lot of duplication, no need to create two separate classes, |
||
self.user_defined_query = custom_query_config["query"] | ||
super().__init__(api=kwargs["api"]) | ||
|
||
@property | ||
def primary_key(self) -> str: | ||
return self.custom_query_config.get("primary_key") or None # not empty stings | ||
|
||
@property | ||
def name(self): | ||
return self.custom_query_config["table_name"] | ||
|
||
def get_query(self, stream_slice: Mapping[str, Any] = None) -> str: | ||
return self.user_defined_query | ||
|
||
def parse_response(self, response: SearchPager) -> Iterable[Mapping]: | ||
for result in response: | ||
yield self.google_ads_client.parse_single_result(schema=None, result=result, query=self.user_defined_query) | ||
|
||
|
||
class CustomQueryIncremental(IncrementalGoogleAdsStream): | ||
""" | ||
Class that should sync by custom user query to Google Ads API | ||
""" | ||
|
||
def __init__(self, custom_query_config, **kwargs): | ||
self.custom_query_config = custom_query_config | ||
self.user_defined_query = custom_query_config["query"] | ||
super().__init__(**kwargs) | ||
|
||
@property | ||
def primary_key(self) -> str: | ||
return self.custom_query_config.get("primary_key") or None | ||
|
||
@property | ||
def cursor_field(self) -> str: | ||
return self.custom_query_config["cursor_field"] | ||
|
||
@property | ||
def name(self): | ||
return self.custom_query_config["table_name"] | ||
|
||
def get_query(self, stream_slice: Mapping[str, Any] = None) -> str: | ||
start_date, end_date = self.get_date_params(stream_slice, self.cursor_field) | ||
final_query = ( | ||
self.user_defined_query | ||
+ f"\nWHERE {self.cursor_field} > '{start_date}' AND {self.cursor_field} < '{end_date}' ORDER BY {self.cursor_field} ASC" | ||
) | ||
return final_query | ||
|
||
def parse_response(self, response: SearchPager) -> Iterable[Mapping]: | ||
for result in response: | ||
yield self.google_ads_client.parse_single_result(schema=None, result=result, query=self.user_defined_query) | ||
|
||
|
||
class CustomQuery: | ||
def __new__(cls, *args, **kwargs): | ||
if kwargs.get("custom_query_config", {}).get("cursor_field"): | ||
return CustomQueryIncremental(*args, **kwargs) | ||
else: | ||
return CustomQueryFullRefresh(*args, **kwargs) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What this for?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
from the user side, catalog commandline argument is taken from the discover.
from our side for read we use console command python main.py read --config ... --catalog ...
This is to fill this command and be able to run it and get the data from source.