airbytehq · avida · Sep 10, 2021 · Aug 5, 2021 · Aug 10, 2021 · Aug 10, 2021
diff --git a/airbyte-integrations/connectors/source-google-ads/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-google-ads/integration_tests/configured_catalog.json
@@ -99,6 +99,28 @@
       },
       "sync_mode": "full_refresh",
       "destination_sync_mode": "overwrite"
+    },
+    {
+      "stream": {
+        "name": "happytable",
+        "json_schema": {},
+        "supported_sync_modes": ["full_refresh", "incremental"],
+        "source_defined_cursor": true,
+        "default_cursor_field": ["campaign.start_date"]
+      },
+      "sync_mode": "incremental",
+      "destination_sync_mode": "append",
+      "cursor_field": ["campaign.start_date"]
+    },
+    {
+      "stream": {
+        "name": "unhappytable",
+        "json_schema": {},
+        "supported_sync_modes": ["full_refresh"],
+        "source_defined_primary_key": [["customer.id"]]
+      },
+      "sync_mode": "full_refresh",
+      "destination_sync_mode": "overwrite"
     }
   ]
 }
diff --git a/...nectors/source-google-ads/integration_tests/configured_catalog_without_empty_streams.json b/...nectors/source-google-ads/integration_tests/configured_catalog_without_empty_streams.json
@@ -63,6 +63,28 @@
       },
       "sync_mode": "full_refresh",
       "destination_sync_mode": "overwrite"
+    },
+    {
+      "stream": {
+        "name": "happytable",
+        "json_schema": {},
+        "supported_sync_modes": ["full_refresh", "incremental"],
+        "source_defined_cursor": true,
+        "default_cursor_field": ["campaign.start_date"]
+      },
+      "sync_mode": "incremental",
+      "destination_sync_mode": "append",
+      "cursor_field": ["campaign.start_date"]
+    },
+    {
+      "stream": {
+        "name": "unhappytable",
+        "json_schema": {},
+        "supported_sync_modes": ["full_refresh"],
+        "source_defined_primary_key": [["customer.id"]]
+      },
+      "sync_mode": "full_refresh",
+      "destination_sync_mode": "overwrite"
     }
   ]
 }
diff --git a/airbyte-integrations/connectors/source-google-ads/source_google_ads/google_ads.py b/airbyte-integrations/connectors/source-google-ads/source_google_ads/google_ads.py
@@ -22,6 +22,7 @@
 # SOFTWARE.
 #
 
+
 from enum import Enum
 from typing import Any, List, Mapping
 
@@ -62,6 +63,7 @@ def send_request(self, query: str) -> SearchGoogleAdsResponse:
     @staticmethod
     def get_fields_from_schema(schema: Mapping[str, Any]) -> List[str]:
         properties = schema.get("properties")
+        # return list(properties.keys()) will be more clear ?
         return [*properties]
-        return [*properties]
+        return list(properties.keys())
-        return [*properties]
+        return list(properties.keys())
 
     @staticmethod
@@ -148,7 +150,17 @@ def get_field_value(field_value: GoogleAdsRow, field: str) -> str:
         return field_value
 
     @staticmethod
-    def parse_single_result(schema: Mapping[str, Any], result: GoogleAdsRow):
-        fields = GoogleAds.get_fields_from_schema(schema)
+    def process_query(query) -> List:
+        query = query.lower().split("select")[1].split("from")[0].strip()
+        fields = query.split(",")
+        fields = [i.strip() for i in fields]
+        return fields
+
+    @staticmethod
+    def parse_single_result(schema: Mapping[str, Any], result: GoogleAdsRow, query: str = None):
+        if not query:
+            fields = GoogleAds.get_fields_from_schema(schema)
+        else:
+            fields = GoogleAds.process_query(query)
         single_record = {field: GoogleAds.get_field_value(result, field) for field in fields}
         return single_record
diff --git a/airbyte-integrations/connectors/source-google-ads/source_google_ads/source.py b/airbyte-integrations/connectors/source-google-ads/source_google_ads/source.py
@@ -23,10 +23,10 @@
 #
 
 
-from typing import Any, List, Mapping, Tuple
+from typing import Any, List, Mapping, MutableMapping, Tuple
 
 from airbyte_cdk import AirbyteLogger
-from airbyte_cdk.models import SyncMode
+from airbyte_cdk.models import AirbyteCatalog, AirbyteStream, SyncMode
 from airbyte_cdk.sources import AbstractSource
 from airbyte_cdk.sources.streams import Stream
 from google.ads.googleads.errors import GoogleAdsException
@@ -39,13 +39,52 @@
     AdGroupAds,
     AdGroups,
     Campaigns,
+    CustomQuery,
+    CustomQueryFullRefresh,
+    CustomQueryIncremental,
     DisplayKeywordPerformanceReport,
     DisplayTopicsPerformanceReport,
     ShoppingPerformanceReport,
 )
 
 
 class SourceGoogleAds(AbstractSource):
+    def get_local_json_schema(self, config) -> MutableMapping[str, Any]:
+        """
+        As agreed, now it returns the default schema (since read -> schema_generator.py may take hours for the end user).
+        If we want to redesign json schema from raw query, this method need to be modified.
+        """
+        local_json_schema = {"$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "additionalProperties": True}
+        return local_json_schema
+
+    def discover(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteCatalog:
+        # streams = [stream.as_airbyte_stream() for stream in self.streams(config=config)]
+        streams = []
+        for stream in self.streams(config=config):
+            if not isinstance(stream, (CustomQueryFullRefresh, CustomQueryIncremental)):
+                streams.append(stream.as_airbyte_stream())
+        # TODO: extend with custom defined streams
+        for usr_query in config.get("custom_query", []):
+            local_cursor_field = (
+                [usr_query.get("cursor_field")] if isinstance(usr_query.get("cursor_field"), str) else usr_query.get("cursor_field")
+            )
+            stream = AirbyteStream(
+                name=usr_query["table_name"],
+                json_schema=self.get_local_json_schema(config=config),
+                supported_sync_modes=[SyncMode.full_refresh],
+            )
+            if usr_query.get("cursor_field"):
+                stream.source_defined_cursor = True  # ???
+                stream.supported_sync_modes.append(SyncMode.incremental)  # type: ignore
+                stream.default_cursor_field = local_cursor_field
+
+            keys = Stream._wrapped_primary_key(usr_query.get("primary_key") or None)  # (!!! read empty strings as null aswell)
+            if keys and len(keys) > 0:
+                stream.source_defined_primary_key = keys
+            streams.append(stream)
+        # end of TODO
+        return AirbyteCatalog(streams=streams)
+
     def get_credentials(self, config: Mapping[str, Any]) -> Mapping[str, Any]:
         credentials = config["credentials"]
 
@@ -69,6 +108,11 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
         incremental_stream_config = dict(
             api=google_api, conversion_window_days=config["conversion_window_days"], start_date=config["start_date"]
         )
+
+        custom_query_streams = [
+            CustomQuery(custom_query_config=config["custom_query"][i], **incremental_stream_config)
+            for i in range(len(config.get("custom_query", [])))
+        ]
         return [
             AccountPerformanceReport(**incremental_stream_config),
             DisplayTopicsPerformanceReport(**incremental_stream_config),
@@ -79,4 +123,4 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
             AdGroups(api=google_api),
             Accounts(api=google_api),
             Campaigns(api=google_api),
-        ]
+        ] + custom_query_streams
diff --git a/airbyte-integrations/connectors/source-google-ads/source_google_ads/spec.json b/airbyte-integrations/connectors/source-google-ads/source_google_ads/spec.json
@@ -67,6 +67,31 @@
         "maximum": 1095,
         "default": 14,
         "examples": [14]
+      },
+      "custom_query": {
+        "type": "array",
-        "type": "array",
+        "type": "array",
+        "title": "Custom GAQL Queries" 
-        "type": "array",
+        "type": "array",
+        "title": "Custom GAQL Queries" 
+        "items": {
+          "type": ["object", "null"],
+          "properties": {
+            "query": {
+              "type": "string",
+              "title": "User defined query to build a report by wish",
-              "title": "User defined query to build a report by wish",
+              "description": "A custom defined GAQL query for building the report.",
-              "title": "User defined query to build a report by wish",
+              "description": "A custom defined GAQL query for building the report.",
+              "examples": ["SELECT segments.ad_destination_type FROM campaign"]
+            },
+            "primary_key": {
+              "type": ["string", "null"],
+              "title": "The unique field to be used as primary key in destination database (if provided)"
+            },
+            "cursor_field": {
+              "type": ["string", "null"],
+              "title": "If not provided, the FULL-REFRESH model will be used. If provided, this will be an incremental stream with this cursor field. Please use datetime fields (start_date, end_date) for proper work."
+            },
+            "table_name": {
+              "type": "string",
+              "title": "The table name in your destination database for choosen query."
+            }
+          }
+        }
       }
     }
   }

diff --git a/airbyte-integrations/connectors/source-google-ads/source_google_ads/streams.py b/airbyte-integrations/connectors/source-google-ads/source_google_ads/streams.py
@@ -192,3 +192,73 @@ class ShoppingPerformanceReport(IncrementalGoogleAdsStream):
     ShoppingPerformanceReport stream: https://developers.google.com/google-ads/api/fields/v8/shopping_performance_view
     Google Ads API field mapping: https://developers.google.com/google-ads/api/docs/migration/mapping#shopping_performance
     """
+
+
+class CustomQueryFullRefresh(GoogleAdsStream):
+    """
+    Class that should sync by custom user query to Google Ads API
+    Fixme: check if WHERE>start_date was applied in standard fullrefresh stream. If yes, reapply here.
+    """
+
+    def __init__(self, custom_query_config, **kwargs):
+        self.custom_query_config = custom_query_config
+        self.user_defined_query = custom_query_config["query"]
+        super().__init__(api=kwargs["api"])
+
+    @property
+    def primary_key(self) -> str:
+        return self.custom_query_config.get("primary_key") or None  # not empty stings
+
+    @property
+    def name(self):
+        return self.custom_query_config["table_name"]
+
+    def get_query(self, stream_slice: Mapping[str, Any] = None) -> str:
+        return self.user_defined_query
+
+    def parse_response(self, response: SearchPager) -> Iterable[Mapping]:
+        for result in response:
+            yield self.google_ads_client.parse_single_result(schema=None, result=result, query=self.user_defined_query)
+
+
+class CustomQueryIncremental(IncrementalGoogleAdsStream):
+    """
+    Class that should sync by custom user query to Google Ads API
+    """
+
+    def __init__(self, custom_query_config, **kwargs):
+        self.custom_query_config = custom_query_config
+        self.user_defined_query = custom_query_config["query"]
+        super().__init__(**kwargs)
+
+    @property
+    def primary_key(self) -> str:
+        return self.custom_query_config.get("primary_key") or None
+
+    @property
+    def cursor_field(self) -> str:
+        return self.custom_query_config["cursor_field"]
+
+    @property
+    def name(self):
+        return self.custom_query_config["table_name"]
+
+    def get_query(self, stream_slice: Mapping[str, Any] = None) -> str:
+        start_date, end_date = self.get_date_params(stream_slice, self.cursor_field)
+        final_query = (
+            self.user_defined_query
+            + f"\nWHERE {self.cursor_field} > '{start_date}' AND {self.cursor_field} < '{end_date}' ORDER BY {self.cursor_field} ASC"
+        )
+        return final_query
+
+    def parse_response(self, response: SearchPager) -> Iterable[Mapping]:
+        for result in response:
+            yield self.google_ads_client.parse_single_result(schema=None, result=result, query=self.user_defined_query)
+
+
+class CustomQuery:
+    def __new__(cls, *args, **kwargs):
+        if kwargs.get("custom_query_config", {}).get("cursor_field"):
+            return CustomQueryIncremental(*args, **kwargs)
+        else:
+            return CustomQueryFullRefresh(*args, **kwargs)