From 2fb79bced0148dba596a2e1313bd85d5df3d3a3b Mon Sep 17 00:00:00 2001 From: Thomas Wood Date: Sat, 8 Jul 2023 11:56:17 +0100 Subject: [PATCH] Add support for Word --- src/harmony/parsing/wrapper_all_parsers.py | 2 +- src/harmony/schemas/enums/file_types.py | 3 ++- src/harmony/util/file_helper.py | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/harmony/parsing/wrapper_all_parsers.py b/src/harmony/parsing/wrapper_all_parsers.py index 4fe40ce..c01b971 100644 --- a/src/harmony/parsing/wrapper_all_parsers.py +++ b/src/harmony/parsing/wrapper_all_parsers.py @@ -9,7 +9,7 @@ def _get_instruments_from_file(file): - if file.file_type == FileType.pdf: + if file.file_type == FileType.pdf or file.file_type == FileType.docx: instruments_from_this_file = convert_pdf_to_instruments(file) elif file.file_type == FileType.txt: instruments_from_this_file = convert_text_to_instruments(file) diff --git a/src/harmony/schemas/enums/file_types.py b/src/harmony/schemas/enums/file_types.py index 3d77a64..fdf54ea 100644 --- a/src/harmony/schemas/enums/file_types.py +++ b/src/harmony/schemas/enums/file_types.py @@ -4,4 +4,5 @@ class FileType(str, Enum): pdf: str = 'pdf' xlsx: str = 'xlsx' - txt: str = 'txt' \ No newline at end of file + txt: str = 'txt' + docx: str = 'docx' \ No newline at end of file diff --git a/src/harmony/util/file_helper.py b/src/harmony/util/file_helper.py index a6c18bd..433e6ab 100644 --- a/src/harmony/util/file_helper.py +++ b/src/harmony/util/file_helper.py @@ -12,10 +12,12 @@ def load_instruments_from_local_file(file_name: str) -> List[Instrument]: file_type = "pdf" elif file_name.lower().endswith("xlsx"): file_type = "xlsx" + elif file_name.lower().endswith("docx"): + file_type = "docx" else: file_type = "txt" - if file_type == "pdf" or file_type == "xlsx": + if file_type == "pdf" or file_type == "xlsx" or file_type == "docx": with open( file_name, "rb") as f: