Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(datasets): do not double encode the data as json when saving an A… #301

Merged
merged 3 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions kedro-datasets/RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
## Major features and improvements

## Bug fixes and other changes
* Fixed an issue on `api.APIDataSet` where the sent data was doubly converted to json
string (once by us and once by the `requests` library).

## Community contributions

Expand Down
4 changes: 2 additions & 2 deletions kedro-datasets/kedro_datasets/api/api_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ def _execute_save_with_chunks(

def _execute_save_request(self, json_data: Any) -> requests.Response:
try:
json_.loads(json_data)
self._request_args["json"] = json_.loads(json_data)
except TypeError:
self._request_args["json"] = json_.dumps(json_data)
self._request_args["json"] = json_data
try:
response = requests.request(**self._request_args)
response.raise_for_status()
Expand Down
39 changes: 32 additions & 7 deletions kedro-datasets/tests/api/test_api_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import socket
from typing import Any

import pytest
import requests
Expand All @@ -23,7 +24,7 @@
TEST_METHOD = "GET"
TEST_HEADERS = {"key": "value"}

TEST_SAVE_DATA = [json.dumps({"key1": "info1", "key2": "info2"})]
TEST_SAVE_DATA = [{"key1": "info1", "key2": "info2"}]


class TestAPIDataSet:
Expand Down Expand Up @@ -272,12 +273,24 @@ def test_socket_error(self, requests_mock):
api_data_set.load()

@pytest.mark.parametrize("method", POSSIBLE_METHODS)
def test_successful_save(self, requests_mock, method):
@pytest.mark.parametrize(
"data",
[TEST_SAVE_DATA, json.dumps(TEST_SAVE_DATA)],
ids=["data_as_dict", "data_as_json_string"],
)
def test_successful_save(self, requests_mock, method, data):
"""
When we want to save some data on a server
Given an APIDataSet class
Then check we get a response
Then check that the response is OK and the sent data is in the correct form.
"""

def json_callback(
request: requests.Request, context: Any # pylint: disable=unused-argument
) -> dict:
"""Callback that sends back the json."""
return request.json()

if method in ["PUT", "POST"]:
api_data_set = APIDataSet(
url=TEST_URL,
Expand All @@ -289,10 +302,12 @@ def test_successful_save(self, requests_mock, method):
TEST_URL_WITH_PARAMS,
headers=TEST_HEADERS,
status_code=requests.codes.ok,
json=json_callback,
)
response = api_data_set._save(TEST_SAVE_DATA)

response = api_data_set._save(data)
assert isinstance(response, requests.Response)
assert response.json() == TEST_SAVE_DATA

elif method == "GET":
api_data_set = APIDataSet(
url=TEST_URL,
Expand All @@ -315,6 +330,13 @@ def test_successful_save_with_json(self, requests_mock, save_methods):
Given an APIDataSet class
Then check we get a response
"""

def json_callback(
request: requests.Request, context: Any # pylint: disable=unused-argument
) -> dict:
"""Callback that sends back the json."""
return request.json()

api_data_set = APIDataSet(
url=TEST_URL,
method=save_methods,
Expand All @@ -324,17 +346,20 @@ def test_successful_save_with_json(self, requests_mock, save_methods):
save_methods,
TEST_URL,
headers=TEST_HEADERS,
text=json.dumps(TEST_JSON_RESPONSE_DATA),
json=json_callback,
)
response_list = api_data_set._save(TEST_SAVE_DATA)

assert isinstance(response_list, requests.Response)
# check that the data was sent in the correct format
assert response_list.json() == TEST_SAVE_DATA

response_dict = api_data_set._save({"item1": "key1"})
assert isinstance(response_dict, requests.Response)
assert response_dict.json() == {"item1": "key1"}

response_json = api_data_set._save(TEST_SAVE_DATA[0])
assert isinstance(response_json, requests.Response)
assert response_json.json() == TEST_SAVE_DATA[0]

@pytest.mark.parametrize("save_methods", SAVE_METHODS)
def test_save_http_error(self, requests_mock, save_methods):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def test_full_table(self):
assert unity_ds._table.full_table_location() == "`default`.`test`"

with pytest.raises(TypeError):
ManagedTableDataSet() # pylint: disable=no-value-for-parameter
ManagedTableDataSet()

def test_describe(self):
unity_ds = ManagedTableDataSet(table="test")
Expand Down
1 change: 0 additions & 1 deletion kedro-datasets/tests/pandas/test_hdf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ def test_save_and_load_df_with_categorical_variables(self, hdf_data_set):

def test_thread_lock_usage(self, hdf_data_set, dummy_dataframe, mocker):
"""Test thread lock usage."""
# pylint: disable=no-member
mocked_lock = HDFDataSet._lock
mocked_lock.assert_not_called()

Expand Down