Skip to content

Commit

Permalink
Merge pull request #28 from ResearchObject/fix-encoding-issue
Browse files Browse the repository at this point in the history
Avoid file upload timeout/504 error when uploading UTF-8 encoded file
  • Loading branch information
elichad authored Jul 10, 2024
2 parents a5ca85b + c2d0b07 commit a99df6d
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 18 deletions.
23 changes: 7 additions & 16 deletions src/rocrate_inveniordm/upload/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,22 +100,13 @@ def upload_file(record_id, file_path):
# Upload file content
api_url = credentials.get_repository_base_url()
upload_url = f"{api_url}/api/records/{record_id}/draft/files/{file_name}/content"
try:
# regular file
with open(file_path, "r") as f:
resp = requests.put(
upload_url,
data=f,
headers=get_headers("application/octet-stream"),
)
except UnicodeDecodeError:
# binary file
with open(file_path, "rb") as f:
resp = requests.put(
upload_url,
data=f,
headers=get_headers("application/octet-stream"),
)

with open(file_path, "rb") as f:
resp = requests.put(
upload_url,
data=f,
headers=get_headers("application/octet-stream"),
)

if resp.status_code != 200:
print(f"Could not upload file content: {resp.status_code} {resp.text}")
Expand Down
76 changes: 76 additions & 0 deletions test/data/datacite-out-utf-8-csv-crate.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"access": {
"record": "public",
"files": "public",
"embargo": {
"active": false
}
},
"metadata": {
"resource_type": {
"id": "dataset"
},
"creators": [
{
"person_or_org": {
"type": "personal",
"given_name": "Jane",
"family_name": "Smith",
"name": "Smith",
"identifiers": [
{
"scheme": "orcid",
"identifier": "0000-0000-0000-0000"
}
]
},
"affiliations": [
{
"name": "Example University"
}
]
}
],
"contributors": [
{
"person_or_org": {
"type": "personal",
"given_name": "Jane",
"family_name": "Smith",
"identifiers": [
{
"scheme": "orcid",
"identifier": "0000-0000-0000-0000"
}
]
},
"role": {
"id": "editor"
},
"affiliations": [
{
"name": "Example University"
}
]
}
],
"title": "Demo Crate",
"publication_date": "2024-03-08",
"description": "a demo crate for Galaxy training",
"publisher": ":unkn",
"rights": [
{
"title": {
"en": "CC BY-NC-SA 4.0 International"
},
"description": {
"en": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International"
},
"link": "https://spdx.org/licenses/CC-BY-NC-SA-4.0.html"
}
]
},
"files": {
"enabled": true
}
}
3 changes: 3 additions & 0 deletions test/data/utf-8-csv-crate/data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"Date","Minimum temperature (°C)","Maximum temperature (°C)","Rainfall (mm)"
2022-02-01,16.0,28.4,0.6
2022-02-02,16.3,17.2,12.4
72 changes: 72 additions & 0 deletions test/data/utf-8-csv-crate/ro-crate-metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"@context": "https://w3id.org/ro/crate/1.1/context",
"@graph": [
{
"@id": "ro-crate-metadata.json",
"@type": "CreativeWork",
"conformsTo": {
"@id": "https://w3id.org/ro/crate/1.1"
},
"about": {
"@id": "./"
}
},
{
"@id": "./",
"@type": [
"Dataset",
"LearningResource"
],
"name": "Demo Crate",
"description": "a demo crate for Galaxy training",
"datePublished": "2024-03-08",
"publisher": "https://ror.org/0abcdef00",
"license": {
"@id": "https://spdx.org/licenses/CC-BY-NC-SA-4.0.html",
"@type": "CreativeWork",
"name": "CC BY-NC-SA 4.0 International",
"description": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International"
},
"author": {
"@id": "https://orcid.org/0000-0000-0000-0000"
},
"hasPart": [
{
"@id": "data.csv"
}
]
},
{
"@id": "data.csv",
"@type": "File",
"name": "Rainfall Katoomba 2022-02",
"description": "Rainfall data for Katoomba in NSW Australia, captured February 2022.",
"encodingFormat": "text/csv",
"license": {
"@id": "https://creativecommons.org/licenses/by-nc-sa/4.0/"
}
},
{
"@id": "https://orcid.org/0000-0000-0000-0000",
"@type": "Person",
"givenName": "Jane",
"familyName": "Smith",
"affiliation": {
"@id": "https://ror.org/0abcdef00"
}
},
{
"@id": "https://ror.org/0abcdef00",
"@type": "Organization",
"name": "Example University",
"url": "https://www.example.org"
},
{
"@id": "https://spdx.org/licenses/CC-BY-NC-SA-4.0.html",
"@type": "CreativeWork",
"name": "CC BY-NC-SA 4.0 International",
"description": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International",
"identifier": "https://spdx.org/licenses/CC-BY-NC-SA-4.0.html"
}
]
}
2 changes: 1 addition & 1 deletion test/integration/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from test.unit.utils import get_request_headers, fetch_inveniordm_record

CRATES = ["minimal-ro-crate", "test-ro-crate", "real-world-example"]
CRATES = ["minimal-ro-crate", "test-ro-crate", "real-world-example", "utf-8-csv-crate"]
TEST_DATA_FOLDER = "test/data"
TEST_OUTPUT_FOLDER = "test/output"

Expand Down
2 changes: 1 addition & 1 deletion test/unit/test_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_get_headers():
expected = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": f"Bearer test-key",
"Authorization": "Bearer test-key",
}

result = uploader.get_headers(input)
Expand Down

0 comments on commit a99df6d

Please sign in to comment.