Skip to content

Commit

Permalink
🎉 Source File: add user-agent option (#14488)
Browse files Browse the repository at this point in the history
* add user-agent option to source-file

* fix acceptance-test

* set version to 0.0 if version cannot be fetched

* vump connector version

* auto-bump connector version

Co-authored-by: marcosmarxm <marcosmarxm@gmail.com>
Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
  • Loading branch information
3 people authored Jul 13, 2022
1 parent bda11b0 commit 32e767c
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@
- name: File
sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77
dockerRepository: airbyte/source-file
dockerImageTag: 0.2.12
dockerImageTag: 0.2.13
documentationUrl: https://docs.airbyte.io/integrations/sources/file
icon: file.svg
sourceType: file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2261,7 +2261,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-file:0.2.12"
- dockerImage: "airbyte/source-file:0.2.13"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/file"
connectionSpecification:
Expand Down Expand Up @@ -2320,6 +2320,11 @@
storage:
type: "string"
const: "HTTPS"
user_agent:
type: "boolean"
title: "User-Agent"
default: false
description: "Add User-Agent to request"
- title: "GCS: Google Cloud Storage"
required:
- "storage"
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-file/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ COPY source_file ./source_file
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.12
LABEL io.airbyte.version=0.2.13
LABEL io.airbyte.name=airbyte/source-file
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test__read_from_public_provider(download_gcs_public_data, storage_provider,
"format": "csv",
"dataset_name": "output",
"reader_options": json.dumps({"sep": separator, "nrows": 42}),
"provider": {"storage": storage_provider},
"provider": {"storage": storage_provider, "user_agent": False},
"url": url,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"url": "https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv",
"provider": {
"storage": "HTTPS",
"reader_impl": "gcsfs"
"reader_impl": "gcsfs",
"user_agent": false
}
}
12 changes: 12 additions & 0 deletions airbyte-integrations/connectors/source-file/source_file/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import json
import traceback
from os import environ
from typing import Iterable
from urllib.parse import urlparse

Expand Down Expand Up @@ -114,6 +115,17 @@ def _open(self, binary):
else:
uri = f"{storage}{user}@{host}:{port}/{url}"
return smart_open.open(uri, transport_params=transport_params, mode=mode)
elif storage in ("https://", "http://"):
transport_params = None
if self._provider["user_agent"]:
airbyte_version = environ.get("AIRBYTE_VERSION", "0.0")
transport_params = {"headers": {"Accept-Encoding": "identity", "User-Agent": f"Airbyte/{airbyte_version}"}}
logger.info(f"TransportParams: {transport_params}")
return smart_open.open(
self.full_url,
mode=mode,
transport_params=transport_params,
)
return smart_open.open(self.full_url, mode=mode)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
"storage": {
"type": "string",
"const": "HTTPS"
},
"user_agent": {
"type": "boolean",
"title": "User-Agent",
"default": false,
"description": "Add User-Agent to request"
}
}
},
Expand Down

0 comments on commit 32e767c

Please sign in to comment.