From 078e63e7fa6cfebf051c4631dc1e6282ead6e48a Mon Sep 17 00:00:00 2001 From: Hironsan Date: Sun, 2 Oct 2022 17:05:33 +0900 Subject: [PATCH] Add data export client --- doccano_client/clients/data_export.py | 59 +++++++++ doccano_client/models/data_export.py | 6 + tests/clients/test_data_export.py | 46 +++++++ .../cassettes/data_export/download.yaml | 113 ++++++++++++++++++ .../fixtures/cassettes/data_export/login.yaml | 55 +++++++++ .../cassettes/data_export/options.yaml | 62 ++++++++++ .../data_export/schedule_download.yaml | 55 +++++++++ 7 files changed, 396 insertions(+) create mode 100644 doccano_client/clients/data_export.py create mode 100644 doccano_client/models/data_export.py create mode 100644 tests/clients/test_data_export.py create mode 100644 tests/fixtures/cassettes/data_export/download.yaml create mode 100644 tests/fixtures/cassettes/data_export/login.yaml create mode 100644 tests/fixtures/cassettes/data_export/options.yaml create mode 100644 tests/fixtures/cassettes/data_export/schedule_download.yaml diff --git a/doccano_client/clients/data_export.py b/doccano_client/clients/data_export.py new file mode 100644 index 0000000..708eab2 --- /dev/null +++ b/doccano_client/clients/data_export.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from typing import Any, Iterator, List + +from doccano_client.client import DoccanoClient +from doccano_client.models.data_export import Option + + +class DataExportClient: + """Client for interacting with the Doccano data export API""" + + def __init__(self, client: DoccanoClient): + self._client = client + + def list_options(self, project_id: int) -> List[Option]: + """Return all download options + + Args: + project_id (int): The id of the project + + Returns: + List[Option]: The list of the download options. + """ + resource = f"projects/{project_id}/download-format" + response = self._client.get(resource) + options = [Option.parse_obj(label) for label in response.json()] + return options + + def schedule_download(self, project_id: int, option: Option, only_approved=False) -> str: + """Schedule a download + + Args: + project_id (int): The id of the project + option (Option): The download option + only_approved (bool): Whether to export approved data only + + Returns: + str: The celery task id + """ + resource = f"projects/{project_id}/download" + data = {"format": option.name, "exportApproved": only_approved} + response = self._client.post(resource, json=data) + task_id = response.json()["task_id"] + return task_id + + def download(self, project_id: int, task_id: str) -> Iterator[Any]: + """Download a file from the server + + Args: + project_id (int): The id of the project + task_id (str): The celery task id + + Yields: + Iterator[Any]: The file content + """ + resource = f"projects/{project_id}/download" + params = {"taskId": task_id} + response = self._client.get(resource, params=params, stream=True) + yield from response.iter_content(chunk_size=65536) diff --git a/doccano_client/models/data_export.py b/doccano_client/models/data_export.py new file mode 100644 index 0000000..0eb40ec --- /dev/null +++ b/doccano_client/models/data_export.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class Option(BaseModel): + name: str + example: str = "" diff --git a/tests/clients/test_data_export.py b/tests/clients/test_data_export.py new file mode 100644 index 0000000..8068950 --- /dev/null +++ b/tests/clients/test_data_export.py @@ -0,0 +1,46 @@ +import pathlib +import time + +import vcr + +from doccano_client.client import DoccanoClient +from doccano_client.clients.data_export import DataExportClient +from doccano_client.models.data_export import Option +from tests.conftest import cassettes_path + + +class TestDataExportClient: + @classmethod + def setup_class(cls): + with vcr.use_cassette(str(cassettes_path / "data_export/login.yaml"), mode="once"): + client = DoccanoClient("http://localhost:8000") + client.login(username="admin", password="password") + cls.client = DataExportClient(client) + cls.project_id = 16 + + def test_list_options(self): + with vcr.use_cassette(str(cassettes_path / "data_export/options.yaml"), mode="once"): + response = self.client.list_options(self.project_id) + assert len(response) > 0 + assert all(isinstance(option, Option) for option in response) + + def test_schedule_download(self): + with vcr.use_cassette(str(cassettes_path / "data_export/schedule_download.yaml"), mode="once"): + option = Option(name="JSONL") + task_id = self.client.schedule_download(self.project_id, option) + assert task_id is not None + assert isinstance(task_id, str) + + def test_download(self): + with vcr.use_cassette(str(cassettes_path / "data_export/download.yaml"), mode="once"): + option = Option(name="JSONL") + task_id = self.client.schedule_download(self.project_id, option) + time.sleep(10) # lazy work + chunks = self.client.download(self.project_id, task_id) + file = pathlib.Path(__file__).parent / "data/annotation.zip" + with file.open("wb") as f: + for chunk in chunks: + f.write(chunk) + assert file.exists() + assert file.stat().st_size > 0 + file.unlink() diff --git a/tests/fixtures/cassettes/data_export/download.yaml b/tests/fixtures/cassettes/data_export/download.yaml new file mode 100644 index 0000000..314ad98 --- /dev/null +++ b/tests/fixtures/cassettes/data_export/download.yaml @@ -0,0 +1,113 @@ +interactions: +- request: + body: '{"format": "JSONL", "exportApproved": false}' + headers: + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '44' + Cookie: + - csrftoken=FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ; + sessionid=rmbx2nr20xbrk8ofdi9u4ocgai6bze9g + User-Agent: + - python-requests/2.28.1 + X-CSRFToken: + - FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ + accept: + - application/json + content-type: + - application/json + referer: + - http://localhost:8000 + method: POST + uri: http://localhost:8000/v1/projects/16/download + response: + body: + string: '{"task_id":"775c0bde-3ee5-4e19-b9cd-6f4d53c3c120"}' + headers: + Allow: + - GET, POST, HEAD, OPTIONS + Connection: + - close + Content-Length: + - '50' + Content-Type: + - application/json + Cross-Origin-Opener-Policy: + - same-origin + Date: + - Sun, 02 Oct 2022 07:57:46 GMT + Referrer-Policy: + - same-origin + Server: + - gunicorn + Vary: + - Accept, Origin, Cookie + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Cookie: + - csrftoken=FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ; + sessionid=rmbx2nr20xbrk8ofdi9u4ocgai6bze9g + User-Agent: + - python-requests/2.28.1 + X-CSRFToken: + - FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ + accept: + - application/json + content-type: + - application/json + referer: + - http://localhost:8000 + method: GET + uri: http://localhost:8000/v1/projects/16/download?taskId=775c0bde-3ee5-4e19-b9cd-6f4d53c3c120 + response: + body: + string: !!binary | + UEsDBBQAAAAIADc/QlW3l8FGmgAAAGsCAAALAAAAYWRtaW4uanNvbmytzTELwjAQhuHdn/HNGdRW + q9lsXdzcS4eIRwkmaSFRC6X/3WCxaEGpkOnuOHjeFvIMvojnScTgqHHgflgHBiVOpMDzgiGrtCbj + bH89H343V6UYNDnR792sHbQ4qLYKqq2DasmgUSN0rWg3RcxRV1Y6eSMUL7nFXV7kUZR02Hu7e69s + xpX0zwqDoVL8Ci4/gttxMJsU/GJH3n4AUEsBAhQDFAAAAAgANz9CVbeXwUaaAAAAawIAAAsAAAAA + AAAAAAAAAKSBAAAAAGFkbWluLmpzb25sUEsFBgAAAAABAAEAOQAAAMMAAAAAAA== + headers: + Allow: + - GET, POST, HEAD, OPTIONS + Connection: + - close + Content-Disposition: + - attachment; filename="169be049-d9b1-4882-a974-cd8625adcd67.zip" + Content-Length: + - '274' + Content-Type: + - application/zip + Cross-Origin-Opener-Policy: + - same-origin + Date: + - Sun, 02 Oct 2022 07:57:56 GMT + Referrer-Policy: + - same-origin + Server: + - gunicorn + Vary: + - Accept, Origin, Cookie + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + status: + code: 200 + message: OK +version: 1 diff --git a/tests/fixtures/cassettes/data_export/login.yaml b/tests/fixtures/cassettes/data_export/login.yaml new file mode 100644 index 0000000..1701bcb --- /dev/null +++ b/tests/fixtures/cassettes/data_export/login.yaml @@ -0,0 +1,55 @@ +interactions: +- request: + body: '{"username": "admin", "password": "password"}' + headers: + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '45' + User-Agent: + - python-requests/2.28.1 + accept: + - application/json + content-type: + - application/json + referer: + - http://localhost:8000 + method: POST + uri: http://localhost:8000/v1/auth/login/ + response: + body: + string: '{"key":"b37c59f6e0db6bf8a829858ce925b9c2b4d7b03d"}' + headers: + Allow: + - POST, OPTIONS + Connection: + - close + Content-Length: + - '50' + Content-Type: + - application/json + Cross-Origin-Opener-Policy: + - same-origin + Date: + - Sun, 02 Oct 2022 07:53:26 GMT + Referrer-Policy: + - same-origin + Server: + - gunicorn + Set-Cookie: + - csrftoken=FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ; + expires=Sun, 01 Oct 2023 07:53:26 GMT; Max-Age=31449600; Path=/; SameSite=Lax + - sessionid=rmbx2nr20xbrk8ofdi9u4ocgai6bze9g; expires=Sun, 16 Oct 2022 07:53:26 + GMT; HttpOnly; Max-Age=1209600; Path=/; SameSite=Lax + Vary: + - Accept, Origin, Cookie + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + status: + code: 200 + message: OK +version: 1 diff --git a/tests/fixtures/cassettes/data_export/options.yaml b/tests/fixtures/cassettes/data_export/options.yaml new file mode 100644 index 0000000..62ba533 --- /dev/null +++ b/tests/fixtures/cassettes/data_export/options.yaml @@ -0,0 +1,62 @@ +interactions: +- request: + body: null + headers: + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Cookie: + - csrftoken=FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ; + sessionid=rmbx2nr20xbrk8ofdi9u4ocgai6bze9g + User-Agent: + - python-requests/2.28.1 + X-CSRFToken: + - FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ + accept: + - application/json + content-type: + - application/json + referer: + - http://localhost:8000 + method: GET + uri: http://localhost:8000/v1/projects/16/download-format + response: + body: + string: '[{"name":"CSV","example":"text,label\n\"Terrible customer service.\",\"negative\"\n\"Really + great transaction.\",\"positive\"\n\"Great price.\",\"positive\"\n"},{"name":"fastText","example":"__label__negative + Terrible customer service.\n__label__positive Really great transaction.\n__label__positive + Great price.\n"},{"name":"JSON","example":"[\n {\n \"text\": \"Terrible + customer service.\",\n \"label\": [\"negative\"]\n },\n {\n \"text\": + \"Really great transaction.\",\n \"label\": [\"positive\"]\n },\n {\n \"text\": + \"Great price.\",\n \"label\": [\"positive\"]\n }\n]\n"},{"name":"JSONL","example":"{\"text\": + \"Terrible customer service.\", \"label\": [\"negative\"]}\n{\"text\": \"Really + great transaction.\", \"label\": [\"positive\"]}\n{\"text\": \"Great price.\", + \"label\": [\"positive\"]}\n"}]' + headers: + Allow: + - GET, HEAD, OPTIONS + Connection: + - close + Content-Length: + - '867' + Content-Type: + - application/json + Cross-Origin-Opener-Policy: + - same-origin + Date: + - Sun, 02 Oct 2022 07:53:26 GMT + Referrer-Policy: + - same-origin + Server: + - gunicorn + Vary: + - Accept, Origin, Cookie + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + status: + code: 200 + message: OK +version: 1 diff --git a/tests/fixtures/cassettes/data_export/schedule_download.yaml b/tests/fixtures/cassettes/data_export/schedule_download.yaml new file mode 100644 index 0000000..695f9dd --- /dev/null +++ b/tests/fixtures/cassettes/data_export/schedule_download.yaml @@ -0,0 +1,55 @@ +interactions: +- request: + body: '{"format": "JSONL", "exportApproved": false}' + headers: + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '44' + Cookie: + - csrftoken=FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ; + sessionid=rmbx2nr20xbrk8ofdi9u4ocgai6bze9g + User-Agent: + - python-requests/2.28.1 + X-CSRFToken: + - FEvkld0o1MszakkYc27mcoDrcTxNvA0J9jjIzKW6OMzkxS0Zar9aK0aeOk3iALkQ + accept: + - application/json + content-type: + - application/json + referer: + - http://localhost:8000 + method: POST + uri: http://localhost:8000/v1/projects/16/download + response: + body: + string: '{"task_id":"3a17236c-1e34-4027-adee-8a1be6fe5e02"}' + headers: + Allow: + - GET, POST, HEAD, OPTIONS + Connection: + - close + Content-Length: + - '50' + Content-Type: + - application/json + Cross-Origin-Opener-Policy: + - same-origin + Date: + - Sun, 02 Oct 2022 07:57:46 GMT + Referrer-Policy: + - same-origin + Server: + - gunicorn + Vary: + - Accept, Origin, Cookie + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + status: + code: 200 + message: OK +version: 1