Skip to content

Commit

Permalink
added payload
Browse files Browse the repository at this point in the history
  • Loading branch information
lobsam committed Dec 19, 2024
1 parent e6bf78c commit b8233d1
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 75 deletions.
Empty file.
8 changes: 4 additions & 4 deletions src/pecha_uploader/category/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ def get_category(category_name: str):
res = response.read().decode("utf-8")
print(res)
if "error" not in res:
return True
return {"status": True}
elif "already exists" in res["error"]:
return True
return False
return {"status": True}
return {"status": False, "error": res}
except HTTPError as e:
print("[categories] Error code: ", e.code)
print(e.read())
return False
return {"status": False, "error": res}
8 changes: 4 additions & 4 deletions src/pecha_uploader/category/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def post_category(en_category_list: List[str], bo_category_list: List[str]):
res = response.read().decode("utf-8")
print("categories response: ", res)
if "error" not in res:
return True
return {"status": True}
elif "already exists" in res:
return True
return False
return {"status": True}
return {"status": False, "error": res}
except HTTPError as e:
print("Error code: ", e)
return False
return {"status": False, "error": e}
Empty file.
6 changes: 3 additions & 3 deletions src/pecha_uploader/index/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def post_index(index: str, category_list: List[str], nodes: Dict):
res = response.read().decode("utf-8")
print(res)
if "error" in res and "already exists." not in res:
return False
return True
return {"status": False, "error": res}
return {"status": True}
except HTTPError as e:
print("Error code: ", e.code)
print(e.read())
return False
return {"status": False, "error": e}
Empty file.
169 changes: 169 additions & 0 deletions src/pecha_uploader/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""
This module processes text files, parses JSON content,
and uploads structured data to various APIs for further processing.
"""
import json

from pecha_uploader.category.upload import post_category
from pecha_uploader.config import BASEPATH
from pecha_uploader.index.upload import post_index
from pecha_uploader.preprocess.upload import post_term
from pecha_uploader.text.upload import post_text
from pecha_uploader.utils import generate_chapters, generate_schema, parse_annotation


def add_by_file(text_name: str, text_type: str):
"""
Read a text file and add.
"""
file = f"{BASEPATH}/jsondata/texts/{text_type}/{text_name}"

print(f"=========================={text_name}===========================")

try:
with open(file, encoding="utf-8") as f:
data = json.load(f)
except Exception as e:
log_error("file", text_name, f"Error opening file: {e}")
return False

payload = {
"bookKey": "",
"categoryEn": [],
"categoryHe": [],
"textEn": [],
"textHe": [],
"bookDepth": 0,
}

try:
for lang in data:
if lang == "source":
for i in range(len(data[lang]["categories"])):
payload["categoryEn"].append(data[lang]["categories"][: i + 1])
for book in data[lang]["books"]:
payload["bookKey"] = payload["categoryEn"][-1][-1]["name"]
payload["textEn"].append(book)
elif lang == "target":
for i in range(len(data[lang]["categories"])):
payload["categoryHe"].append(data[lang]["categories"][: i + 1])
for book in data[lang]["books"]:
payload["textHe"].append(book)
except Exception as e:
log_error("Payload", text_name, f"Error parsing data: {e}")
return False

try:
print("===========================( post_category )===========================")
for i in range(len(payload["categoryEn"])):
response = post_term(
payload["categoryEn"][i][-1]["name"],
payload["categoryHe"][i][-1]["name"],
)
if not response["status"]:
if "term_conflict" in response:
error = response["term_conflict"]
log_error("Term", text_name, f"{error}")
else:
log_error("Term", text_name, f"{response}")
return False

category_response = post_category(
payload["categoryEn"][i], payload["categoryHe"][i]
)
if not category_response["status"]:
error = category_response["error"]
log_error("Category", text_name, f"{error}")
return False

print(
"============================( post_index )================================"
)
schema = generate_schema(payload["textEn"][0], payload["textHe"][0])
index_response = post_index(
payload["bookKey"], payload["categoryEn"][-1], schema[0]
)
if not index_response["status"]:
error = index_response["error"]
log_error("Index", text_name, f"{error}")
return False

print(
"=============================( post_text )================================="
)
text_index_key = payload["bookKey"]

for book in payload["textHe"]:
if not process_text(book, "he", text_index_key):
return False

for book in payload["textEn"]:
if not process_text(book, "en", text_index_key):
return False

except Exception as e:
print("Error : ", e)
return False

with open(
f"{BASEPATH}/jsondata/texts/success.txt", mode="a", encoding="utf-8"
) as f:
f.write(f"{text_name}\n")

return True


def process_text(book: dict, lang: str, text_index_key: str):
"""
Process text for a given language and post it.
"""
text = {
"versionTitle": book["title"],
"versionSource": book["versionSource"],
"language": lang,
"actualLanguage": book["language"],
"completestatus": book["completestatus"],
"text": [],
}

if "content" in book:
# Complex text
if isinstance(book["content"], dict):
result = generate_chapters(book["content"], book["language"])
for key, value in result.items():
text["text"] = value
text_response = post_text(key, text)
if value and not text_response["status"]:
error = text_response["error"]
log_error("Text", key, f"{error}")
return False

# Simple text
elif isinstance(book["content"], list):
text["text"] = parse_annotation(book["content"])
text_response = post_text(key, text)
if not text_response["status"]:
error = text_response["error"]
log_error("Text", text_index_key, f"{error}")
return False

return True


def log_error(api_name: str, text_name: str, message: str):
"""
Logs error details to a designated error file.
Args:
api_name (str): The name of the API where the error occurred.
text_name (str): The name of the text file being processed.
message (str): A descriptive error message.
Writes:
The error details into an `errors.txt` file located at
`{BASEPATH}/pecha_uploader/texts/`.
"""
with open(
f"{BASEPATH}/pecha_uploader/texts/errors.txt", mode="a", encoding="utf-8"
) as error_file: # noqa
error_file.write(f"({api_name})--->{text_name}: {message}\n\n")
61 changes: 0 additions & 61 deletions src/pecha_uploader/preprocess.py

This file was deleted.

Empty file.
5 changes: 2 additions & 3 deletions src/pecha_uploader/text/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,9 @@ def post_text(text_name: str, text_content: Dict):
res = response.read().decode("utf-8")
if "error" not in res:
print(f"\n{res}\n")
return True
print("res:>>>>>", res)
return {"status": True}

return False
except HTTPError as e:
print("Error code: ", e)
return False
return {"status": False, "error": e}

0 comments on commit b8233d1

Please sign in to comment.