Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[low-code] convert request.body to a dict when converting to AirbyteLogMessage #20557

Merged
merged 13 commits into from
Jan 3, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -408,20 +408,34 @@ def state(self, value: StreamState):
def parse_records_and_emit_request_and_responses(self, request, response, stream_slice, stream_state) -> Iterable[StreamData]:
# Only emit requests and responses when running in debug mode
if self.logger.isEnabledFor(logging.DEBUG):
yield self._create_trace_message_from_request(request)
yield self._create_trace_message_from_response(response)
yield prepared_request_to_airbyte_message(request)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rename

yield response_to_airbyte_message(response)
# Not great to need to call _read_pages which is a private method
# A better approach would be to extract the HTTP client from the HttpStream and call it directly from the HttpRequester
yield from self.parse_response(response, stream_slice=stream_slice, stream_state=stream_state)

def _create_trace_message_from_request(self, request: requests.PreparedRequest):
# FIXME: this should return some sort of trace message
request_dict = {"url": request.url, "http_method": request.method, "headers": dict(request.headers), "body": request.body}
log_message = filter_secrets(f"request:{json.dumps(request_dict)}")
return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message=log_message))

def _create_trace_message_from_response(self, response: requests.Response):
# FIXME: this should return some sort of trace message
response_dict = {"body": response.text, "headers": dict(response.headers), "status_code": response.status_code}
log_message = filter_secrets(f"response:{json.dumps(response_dict)}")
return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message=log_message))

def prepared_request_to_airbyte_message(request: requests.PreparedRequest) -> AirbyteMessage:
# FIXME: this should return some sort of trace message
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to keep them FIXMEs for now 😞

request_dict = {
"url": request.url,
"http_method": request.method,
"headers": dict(request.headers),
"body": _body_binary_string_to_dict(request.body),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of assigning the body directly, delegate to_body_binary_string_to_dict to convert it to an Optional[Mapping]

}
log_message = filter_secrets(f"request:{json.dumps(request_dict)}")
return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message=log_message))


def _body_binary_string_to_dict(body_str) -> Optional[Mapping[str, str]]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we have typing for this parameter?

if body_str:
return json.loads(body_str.decode())
else:
return None


def response_to_airbyte_message(response: requests.Response) -> AirbyteMessage:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved to a function for consistency and ease of testing

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those methods aren't used anywhere else except from the tests though. Should we indicate them as private using the _ prefix?

Saying that makes me think it is weird that parse_records_and_emit_request_and_responses is public as well since it's only used internally by the class. My main concern is that we have someone external using it which would bring a dependency to manage when we want to change this. This happened for #20019 and prevented us to do a breaking change we wanted to do

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good call. prepended both with _

# FIXME: this should return some sort of trace message
response_dict = {"body": response.text, "headers": dict(response.headers), "status_code": response.status_code}
log_message = filter_secrets(f"response:{json.dumps(response_dict)}")
return AirbyteMessage(type=MessageType.LOG, log=AirbyteLogMessage(level=Level.INFO, message=log_message))
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
import airbyte_cdk.sources.declarative.requesters.error_handlers.response_status as response_status
import pytest
import requests
from airbyte_cdk.models import AirbyteLogMessage, Level, SyncMode
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode, Type
from airbyte_cdk.sources.declarative.exceptions import ReadException
from airbyte_cdk.sources.declarative.requesters.error_handlers.response_action import ResponseAction
from airbyte_cdk.sources.declarative.requesters.error_handlers.response_status import ResponseStatus
from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever, prepared_request_to_airbyte_message
from airbyte_cdk.sources.declarative.stream_slicers import DatetimeStreamSlicer
from airbyte_cdk.sources.streams.http.auth import NoAuth
from airbyte_cdk.sources.streams.http.http import HttpStream
Expand Down Expand Up @@ -433,3 +433,93 @@ def test_path(test_name, requester_path, paginator_path, expected_path):

actual_path = retriever.path(stream_state=None, stream_slice=None, next_page_token=None)
assert expected_path == actual_path


@pytest.mark.parametrize(
"test_name, http_method, url, headers, params, body, expected_airbyte_message",
[
(
"test_basic_get_request",
HttpMethod.GET,
"https://airbyte.io",
{},
{},
{},
AirbyteMessage(
type=Type.LOG,
log=AirbyteLogMessage(
level=Level.INFO, message='request:{"url": "https://airbyte.io/", "http_method": "GET", "headers": {}, "body": null}'
),
),
),
(
"test_get_request_with_headers",
HttpMethod.GET,
"https://airbyte.io",
{"h1": "v1", "h2": "v2"},
{},
{},
AirbyteMessage(
type=Type.LOG,
log=AirbyteLogMessage(
level=Level.INFO,
message='request:{"url": "https://airbyte.io/", "http_method": "GET", "headers": {"h1": "v1", "h2": "v2"}, "body": null}',
),
),
),
(
"test_get_request_with_request_params",
HttpMethod.GET,
"https://airbyte.io",
{},
{"p1": "v1", "p2": "v2"},
{},
AirbyteMessage(
type=Type.LOG,
log=AirbyteLogMessage(
level=Level.INFO,
message='request:{"url": "https://airbyte.io/?p1=v1&p2=v2", "http_method": "GET", "headers": {}, "body": null}',
),
),
),
(
"test_get_request_with_request_body",
HttpMethod.GET,
"https://airbyte.io",
{"Content-Type": "application/json"},
{},
{"b1": "v1", "b2": "v2"},
AirbyteMessage(
type=Type.LOG,
log=AirbyteLogMessage(
level=Level.INFO,
message='request:{"url": "https://airbyte.io/", "http_method": "GET", "headers": {"Content-Type": "application/json", "Content-Length": "24"}, "body": {"b1": "v1", "b2": "v2"}}',
),
),
),
(
"test_basic_post_request",
HttpMethod.POST,
"https://airbyte.io",
{},
{},
{},
AirbyteMessage(
type=Type.LOG,
log=AirbyteLogMessage(
level=Level.INFO,
message='request:{"url": "https://airbyte.io/", "http_method": "POST", "headers": {"Content-Length": "0"}, "body": null}',
),
),
),
],
)
def test_prepared_request_to_airbyte_message(test_name, http_method, url, headers, params, body, expected_airbyte_message):
request = requests.Request(method=http_method.name, url=url, headers=headers, params=params)
if body:
request.json = body
prepared_request = request.prepare()

actual_airbyte_message = prepared_request_to_airbyte_message(prepared_request)

assert expected_airbyte_message == actual_airbyte_message
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def test_read_stream():
request = {
"url": "https://demonslayers.com/api/v1/hashiras?era=taisho",
"headers": {"Content-Type": "application/json"},
"http_method": "GET",
Copy link
Contributor Author

@girarda girarda Dec 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixes the tests

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we'll be able to merge this PR when this is merged in so CI fails on broken tests #20217

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing that out - was looking into it for a PR I'm working on right now 😅

"body": {"custom": "field"},
}
response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'}
Expand All @@ -165,6 +166,7 @@ def test_read_stream():
parameters={"era": ["taisho"]},
headers={"Content-Type": "application/json"},
body={"custom": "field"},
http_method="GET",
),
response=HttpResponse(status=200, headers={"field": "value"}, body={"name": "field"}),
records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}],
Expand All @@ -175,6 +177,7 @@ def test_read_stream():
parameters={"era": ["taisho"]},
headers={"Content-Type": "application/json"},
body={"custom": "field"},
http_method="GET",
),
response=HttpResponse(status=200, headers={"field": "value"}, body={"name": "field"}),
records=[{"name": "Mitsuri Kanroji"}],
Expand Down Expand Up @@ -210,6 +213,7 @@ def test_read_stream_with_logs():
"url": "https://demonslayers.com/api/v1/hashiras?era=taisho",
"headers": {"Content-Type": "application/json"},
"body": {"custom": "field"},
"http_method": "GET",
}
response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'}
expected_pages = [
Expand All @@ -219,6 +223,7 @@ def test_read_stream_with_logs():
parameters={"era": ["taisho"]},
headers={"Content-Type": "application/json"},
body={"custom": "field"},
http_method="GET",
),
response=HttpResponse(status=200, headers={"field": "value"}, body={"name": "field"}),
records=[{"name": "Shinobu Kocho"}, {"name": "Muichiro Tokito"}],
Expand All @@ -229,6 +234,7 @@ def test_read_stream_with_logs():
parameters={"era": ["taisho"]},
headers={"Content-Type": "application/json"},
body={"custom": "field"},
http_method="GET",
),
response=HttpResponse(status=200, headers={"field": "value"}, body={"name": "field"}),
records=[{"name": "Mitsuri Kanroji"}],
Expand Down Expand Up @@ -272,6 +278,7 @@ def test_read_stream_no_records():
"url": "https://demonslayers.com/api/v1/hashiras?era=taisho",
"headers": {"Content-Type": "application/json"},
"body": {"custom": "field"},
"http_method": "GET",
}
response = {"status_code": 200, "headers": {"field": "value"}, "body": '{"name": "field"}'}
expected_pages = [
Expand All @@ -281,6 +288,7 @@ def test_read_stream_no_records():
parameters={"era": ["taisho"]},
headers={"Content-Type": "application/json"},
body={"custom": "field"},
http_method="GET",
),
response=HttpResponse(status=200, headers={"field": "value"}, body={"name": "field"}),
records=[],
Expand All @@ -291,6 +299,7 @@ def test_read_stream_no_records():
parameters={"era": ["taisho"]},
headers={"Content-Type": "application/json"},
body={"custom": "field"},
http_method="GET",
),
response=HttpResponse(status=200, headers={"field": "value"}, body={"name": "field"}),
records=[],
Expand Down Expand Up @@ -388,15 +397,19 @@ def test_read_stream_returns_error_if_stream_does_not_exist():
pytest.param(
'request:{"url": "https://nichirin.com/v1/swords?color=orange", "http_method": "PUT", "headers": {"field": "name"}, "body":{"key": "value"}}',
HttpRequest(
url="https://nichirin.com/v1/swords", parameters={"color": ["orange"]}, headers={"field": "name"}, body={"key": "value"},
url="https://nichirin.com/v1/swords",
parameters={"color": ["orange"]},
headers={"field": "name"},
body={"key": "value"},
http_method="PUT",
),
id="test_create_request_with_all_fields",
),
pytest.param(
'request:{"url": "https://nichirin.com/v1/swords?color=orange", "http_method": "GET", "headers": {"field": "name"}}',
HttpRequest(url="https://nichirin.com/v1/swords", parameters={"color": ["orange"]}, headers={"field": "name"},
http_method="GET"),
HttpRequest(
url="https://nichirin.com/v1/swords", parameters={"color": ["orange"]}, headers={"field": "name"}, http_method="GET"
),
id="test_create_request_with_no_body",
),
pytest.param(
Expand All @@ -409,6 +422,11 @@ def test_read_stream_returns_error_if_stream_does_not_exist():
HttpRequest(url="https://nichirin.com/v1/swords", headers={"field": "name"}, body={"key": "value"}, http_method="PUT"),
id="test_create_request_with_no_parameters",
),
pytest.param(
'request:{"url": "https://nichirin.com/v1/swords", "http_method": "POST", "headers": {"field": "name"}, "body":null}',
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a test where "body": null for peace of mind

HttpRequest(url="https://nichirin.com/v1/swords", headers={"field": "name"}, body=None, http_method="POST"),
id="test_create_request_with_null_body",
),
pytest.param("request:{invalid_json: }", None, id="test_invalid_json_still_does_not_crash"),
pytest.param("just a regular log message", None, id="test_no_request:_prefix_does_not_crash"),
],
Expand Down