From b6c1c29ddc63525edf6ef3e7c8e1bf35628b50b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Ja=C5=82oszy=C5=84ski?= Date: Sat, 12 Oct 2024 14:26:47 +0200 Subject: [PATCH 1/4] Create script managing models. Add function to upload models from local drive. --- nlp/models.sh | 27 ++++++ nlp/models.version | 5 +- nlp/src/google_service.py | 4 +- nlp/src/version_checker.py | 174 +++++++++++++++++++++++++++++++++++-- 4 files changed, 201 insertions(+), 9 deletions(-) create mode 100755 nlp/models.sh diff --git a/nlp/models.sh b/nlp/models.sh new file mode 100755 index 0000000..4705c87 --- /dev/null +++ b/nlp/models.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +command=$1; shift +array=("$@") + +case $command in + "install") + if [[ ${#array[@]} -eq 0 ]]; then + PYTHONPATH=src python -c 'import version_checker; version_checker.update_model_versions()' + else + PYTHONPATH=src python -c 'import sys, version_checker; version_checker.update_model_versions(sys.argv[1:])' "${array[@]}" + fi + ;; + "status") + PYTHONPATH=src python -c 'import version_checker; version_checker.get_status()' + ;; + "upload") + if [[ ${#array[@]} -eq 0 ]]; then + PYTHONPATH=src python -c 'import sys, version_checker; version_checker.upload_manager()' + else + PYTHONPATH=src python -c 'import sys, version_checker; version_checker.upload_manager(sys.argv[1:])' "${array[@]}" + fi + + ;; + "clean") rm -r models ;; + *) echo "Usage: $0 {install|clean|status}" ;; +esac diff --git a/nlp/models.version b/nlp/models.version index 4f3907e..fe2eb2a 100644 --- a/nlp/models.version +++ b/nlp/models.version @@ -1,4 +1,5 @@ { - "emotions" : "v1", - "sentiment" : "v1" + "emotions": "v1", + "sentiment": "v1", + "spam": "v2" } \ No newline at end of file diff --git a/nlp/src/google_service.py b/nlp/src/google_service.py index 4ba67b6..f92136b 100644 --- a/nlp/src/google_service.py +++ b/nlp/src/google_service.py @@ -7,8 +7,8 @@ def create_service(client_secret_file, api_name, api_version, scopes): """ - Shows basic usage of the Drive v3 API. Based on Google Documentation - https://developers.google.com/drive/api/quickstart/python?hl=pl + Create a service object for the Google Drive API. Function is based on the Google + API documentation: https://developers.google.com/drive/api/quickstart/python?hl=pl :param client_secret_file: The client secret file. :param api_name: The name of the API. diff --git a/nlp/src/version_checker.py b/nlp/src/version_checker.py index 2b36e9d..dbacce5 100644 --- a/nlp/src/version_checker.py +++ b/nlp/src/version_checker.py @@ -4,7 +4,7 @@ from httplib2 import ServerNotFoundError from google_service import create_service -from googleapiclient.http import MediaIoBaseDownload +from googleapiclient.http import MediaIoBaseDownload, MediaFileUpload from googleapiclient.errors import HttpError from tqdm import tqdm import json @@ -212,8 +212,16 @@ def update_model_versions(models_names=None): [models_names.append(x) for x in data] count_correct = 0 + errors = 0 for model_name in models_names: - current_version = data[model_name] + try: + current_version = data[model_name] + except KeyError: + print(f"Model {model_name} not found in the models.version file. " + f"Check out name of the model file.") + errors += 1 + continue + folder_id = find_folder(SERVICE, model_name) if folder_id is None: @@ -223,10 +231,11 @@ def update_model_versions(models_names=None): model_name) if not download_status: print("Error occurred while downloading the file.") + errors += 1 else: count_correct += 1 - return bool(count_correct == len(models_names)) + return bool(count_correct == len(models_names) - errors) def get_status_information(service, parent_id='root', level=0, path='models/' @@ -251,7 +260,6 @@ def get_status_information(service, parent_id='root', level=0, path='models/' file_exists = os.path.exists(path + file) is_not_outdated = is_file_up_to_date(files[file], file_exists, path + file) - if (not file_exists) or (not is_not_outdated): file_conditions = " ---> File diverged from online version" else: @@ -281,4 +289,160 @@ def get_status_information(service, parent_id='root', level=0, path='models/' def get_status(): """This function prints the status of the files and folders.""" - get_status_information(SERVICE, print_output=True) \ No newline at end of file + get_status_information(SERVICE) + + +def create_folder(folder_name, parent_id='root'): + """ + This function creates a folder in the Google Drive. + :param folder_name: The name of the folder to create. + :param parent_id: The ID of the parent folder. + + :return: The ID of the created folder. + """ + folder_name = { + 'name': folder_name, + 'parents': [parent_id], + 'mimeType': 'application/vnd.google-apps.folder' + } + + file = (SERVICE.files().create( + body=folder_name) + .execute()) + + return file.get('id') + + +def create_file(folder_name_id, version_folder_id, file_path, file_name): + """ + This function creates a file in the Google Drive folder. + :param folder_name_id: The ID of the folder to create the file. + :param version_folder_id: The ID of the version folder to create the file. + :param file_path: The path to the file to upload. + :param file_name: The name of the file to upload. + + :return: The file created. Or False if the folder_name_id or version_folder_id is None. + """ + if folder_name_id is None or version_folder_id is None: + return False + + file_metadata = { + 'name': file_name, + 'parents': [version_folder_id] + } + + media = MediaFileUpload(file_path, resumable=True) + file = SERVICE.files().create(body=file_metadata, + media_body=media).execute() + return file + + +def upload_file(folder_name, version, file_name): + """ + This function uploads the file to the Google Drive folder. + :param folder_name: The name of the folder to upload. + :param version: The version of the model. + :param file_name: The name of the file to upload. + + :return: True if the file was uploaded successfully, False otherwise. + """ + try: + file_path = f"models/{folder_name}/{version}/{file_name}" + if not os.path.exists(file_path): + print(f"File {file_name} not found. Stopping upload.") + return False + + folder_id = find_folder(SERVICE, folder_name) + if folder_id is None: + folder_name_id = create_folder(folder_name) + version_folder_id = create_folder(version, folder_name_id) + file_create = create_file(folder_name_id, version_folder_id, file_path, file_name) + + if file_create is None: + return False + + data = {folder_name: version} + with open("models.version", "a") as f: + json.dump(data, f, indent=4) + + print(f"File {file_name} uploaded successfully. Model file updated.") + return True + else: + query = (f"'{folder_id}' in parents and mimeType='application/" + f"vnd.google-apps.folder'") + response = SERVICE.files().list(q=query).execute() + folders = response.get('files', []) + + version_folder_id = None + for folder in folders: + if folder['name'] == version: + version_folder_id = folder['id'] + break + + if version_folder_id is None: + version_folder_id = create_folder(version, folder_id) + + response_files = SERVICE.files().list(q=f"'{version_folder_id}' in parents").execute() + files = response_files.get('files', []) + + proceed = False + for file in files: + if file['name'] == file_name: + while True: + answer = input(f"File {file_name} already exists Do you " + f"want to overwrite the file? (y/n): ") + if answer == 'n': + return None + elif answer == 'y': + proceed = True + break + else: + continue + if proceed: + body_value = {'trashed': True} + SERVICE.files().update(fileId=file['id'], body=body_value).execute() + break + + file_create = create_file(folder_id, version_folder_id, file_path, file_name) + if file_create is None: + return False + + with open("models.version", "r+") as f: + data = json.load(f) + data[folder_name] = version + f.seek(0) + json.dump(data, f, indent=4) + f.truncate() + + print(f"File {file_name} uploaded successfully. Models file updated.") + return True + except HttpError as e: + print(f"An error occurred: {e}") + return False + except ServerNotFoundError as e: + print(f"Server not found. Stopping upload. {e}") + return False + except TimeoutError as e: + print(f"Connection timed out. Stopping upload. {e}") + return False + + +def upload_manager(folders=None): + """ + This function manages uploads to the Google Drive folder. + :param folders: The names of the folders to upload. If None, all folders are uploaded. + """ + if folders is None: + folders = read_model_file() + + data = read_model_file() + for folder in folders: + version = data[folder] + files = os.listdir(f"models/{folder}/{version}") + for file_name in files: + print(f"Uploading {folder} model with version {version}, file {file_name}") + update_successful = upload_file(folder, version, file_name) + if update_successful is None: + print(f"File {file_name} already exists. Skipping file.") + elif not update_successful: + print(f"Error occurred while uploading the file {file_name}.") From fa533577d787e5c80cd9952b5c91662fbb08e5b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Ja=C5=82oszy=C5=84ski?= Date: Sat, 12 Oct 2024 14:34:17 +0200 Subject: [PATCH 2/4] Add documentation for models manager. Changes in upload function. --- nlp/.gitignore | 1 + nlp/README.md | 25 ++++++++++++++++++++++ nlp/docs/index.md | 1 + nlp/docs/using_manager.md | 23 ++++++++++++++++++++ nlp/src/version_checker.py | 43 ++++++++++++++++++-------------------- 5 files changed, 70 insertions(+), 23 deletions(-) create mode 100644 nlp/docs/using_manager.md diff --git a/nlp/.gitignore b/nlp/.gitignore index 372f3e9..88a9cb5 100644 --- a/nlp/.gitignore +++ b/nlp/.gitignore @@ -2,6 +2,7 @@ .pytest_cache .idea/ models +_build # Source folder src/__pycache__ diff --git a/nlp/README.md b/nlp/README.md index 98fe5cc..d0dbd3b 100644 --- a/nlp/README.md +++ b/nlp/README.md @@ -28,3 +28,28 @@ To run the server, run the following command: To run the tests, run the following command: ```python -m pytest``` + +## Using the models manager + +To use the models manager, run the following command: + +```bash +./models.sh +``` + +Manager commands: +- status - Check the status of the models +- install - Install the models +- clean - Delete all models from file system +- upload - Upload the models to the server + +Model names is the optional parameter for upload and install: +- If no model names are provided, all models will be installed/uploaded +```bash +./models.sh upload +``` +- To upload/install model give list of model names, separate them with a space +```bash +./models.sh install sentiment +``` + diff --git a/nlp/docs/index.md b/nlp/docs/index.md index 97a8438..f006e71 100644 --- a/nlp/docs/index.md +++ b/nlp/docs/index.md @@ -7,6 +7,7 @@ installation.md using_docker.md env_example.md +using_manager.md ``` ```{toctree} diff --git a/nlp/docs/using_manager.md b/nlp/docs/using_manager.md new file mode 100644 index 0000000..7e4f9a8 --- /dev/null +++ b/nlp/docs/using_manager.md @@ -0,0 +1,23 @@ +# Using the models manager + +To use the models manager, run the following command: + +```bash +./models.sh +``` + +Manager commands: +- status - Check the status of the models +- install - Install the models +- clean - Delete all models from file system +- upload - Upload the models to the server + +Model names is the optional parameter for upload and install: +- If no model names are provided, all models will be installed/uploaded +```bash +./models.sh upload +``` +- To upload/install model give list of model names, separate them with a space +```bash +./models.sh install sentiment +``` \ No newline at end of file diff --git a/nlp/src/version_checker.py b/nlp/src/version_checker.py index dbacce5..3a30526 100644 --- a/nlp/src/version_checker.py +++ b/nlp/src/version_checker.py @@ -356,15 +356,12 @@ def upload_file(folder_name, version, file_name): if folder_id is None: folder_name_id = create_folder(folder_name) version_folder_id = create_folder(version, folder_name_id) - file_create = create_file(folder_name_id, version_folder_id, file_path, file_name) + file_create = create_file(folder_name_id, version_folder_id, + file_path, file_name) if file_create is None: return False - data = {folder_name: version} - with open("models.version", "a") as f: - json.dump(data, f, indent=4) - print(f"File {file_name} uploaded successfully. Model file updated.") return True else: @@ -382,7 +379,8 @@ def upload_file(folder_name, version, file_name): if version_folder_id is None: version_folder_id = create_folder(version, folder_id) - response_files = SERVICE.files().list(q=f"'{version_folder_id}' in parents").execute() + response_files = SERVICE.files().list(q=f"'{version_folder_id}' " + f"in parents").execute() files = response_files.get('files', []) proceed = False @@ -403,17 +401,11 @@ def upload_file(folder_name, version, file_name): SERVICE.files().update(fileId=file['id'], body=body_value).execute() break - file_create = create_file(folder_id, version_folder_id, file_path, file_name) + file_create = create_file(folder_id, version_folder_id, + file_path, file_name) if file_create is None: return False - with open("models.version", "r+") as f: - data = json.load(f) - data[folder_name] = version - f.seek(0) - json.dump(data, f, indent=4) - f.truncate() - print(f"File {file_name} uploaded successfully. Models file updated.") return True except HttpError as e: @@ -437,12 +429,17 @@ def upload_manager(folders=None): data = read_model_file() for folder in folders: - version = data[folder] - files = os.listdir(f"models/{folder}/{version}") - for file_name in files: - print(f"Uploading {folder} model with version {version}, file {file_name}") - update_successful = upload_file(folder, version, file_name) - if update_successful is None: - print(f"File {file_name} already exists. Skipping file.") - elif not update_successful: - print(f"Error occurred while uploading the file {file_name}.") + try: + version = data[folder] + files = os.listdir(f"models/{folder}/{version}") + for file_name in files: + print(f"Uploading {folder} model with version {version}, file {file_name}") + update_successful = upload_file(folder, version, file_name) + if update_successful is None: + print(f"File {file_name} already exists. Skipping file.") + elif not update_successful: + print(f"Error occurred while uploading the file {file_name}.") + except KeyError: + print(f"Model {folder} not found in the models.version file. " + f"Check out name of the model file.") + continue From 6d6c6edbf3c9ce076227b0f494746fa849fcc941 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Ja=C5=82oszy=C5=84ski?= Date: Sat, 19 Oct 2024 13:36:50 +0200 Subject: [PATCH 3/4] Update documentation. Add types to functions. Change status function to show changes with local files. --- nlp/.gitignore | 1 + nlp/README.Docker.md | 33 ++-- nlp/README.md | 6 +- nlp/docs/using_docker.md | 36 ++-- nlp/docs/using_manager.md | 8 +- nlp/{models.sh => manager.sh} | 8 +- nlp/src/google_service.py | 17 +- nlp/src/version_checker.py | 187 ++++++++++++-------- nlp/{models.version => version_models.json} | 0 9 files changed, 170 insertions(+), 126 deletions(-) rename nlp/{models.sh => manager.sh} (81%) rename nlp/{models.version => version_models.json} (100%) diff --git a/nlp/.gitignore b/nlp/.gitignore index 88a9cb5..1edecf4 100644 --- a/nlp/.gitignore +++ b/nlp/.gitignore @@ -18,6 +18,7 @@ venv/ # File types *.json +!version_models.json # Environment variables .env diff --git a/nlp/README.Docker.md b/nlp/README.Docker.md index 6dae561..98ed78f 100644 --- a/nlp/README.Docker.md +++ b/nlp/README.Docker.md @@ -1,22 +1,27 @@ -### Building and running your application +## Building and running your application -When you're ready, start your application by running: -`docker compose up --build`. +When you're ready, build your application by running: +```docker build -t rmoods-nlp .``` -Your application will be available at http://localhost:8000. +The ```-t``` parameter refers to tag, which is the name of the image. +You can replace ``rmoods-nlp`` with any name you like. -### Deploying your application to the cloud +After the build completes, you can run your application using the following command: +```docker run --name rmoods-nlp -d rmoods-nlp``` -First, build your image, e.g.: `docker build -t myapp .`. -If your cloud uses a different CPU architecture than your development -machine (e.g., you are on a Mac M1 and your cloud provider is amd64), -you'll want to build the image for that platform, e.g.: -`docker build --platform=linux/amd64 -t myapp .`. +The ```-d``` parameter runs the container in detached mode, which means the container +runs in the background ```--name``` refers to docker name, it can be used +instead of container id. -Then, push it to your registry, e.g. `docker push myregistry.com/myapp`. +If you want to open docker image console run: +```docker exec -it rmoods-nlp /bin/bash``` -Consult Docker's [getting started](https://docs.docker.com/go/get-started-sharing/) -docs for more detail on building and pushing. +```rmoods-nlp``` is the name of the container. If we did not specify a name, +Docker would have assigned a random name to the container. To check the name of the container, run: +```docker ps``` -### References +To stop the container, run: +```docker stop rmoods-nlp``` + +## References * [Docker's Python guide](https://docs.docker.com/language/python/) \ No newline at end of file diff --git a/nlp/README.md b/nlp/README.md index d0dbd3b..d149e88 100644 --- a/nlp/README.md +++ b/nlp/README.md @@ -34,7 +34,7 @@ To run the tests, run the following command: To use the models manager, run the following command: ```bash -./models.sh +./manager.sh ``` Manager commands: @@ -46,10 +46,10 @@ Manager commands: Model names is the optional parameter for upload and install: - If no model names are provided, all models will be installed/uploaded ```bash -./models.sh upload +./manager.sh upload ``` - To upload/install model give list of model names, separate them with a space ```bash -./models.sh install sentiment +./manager.sh install sentiment ``` diff --git a/nlp/docs/using_docker.md b/nlp/docs/using_docker.md index cb104cb..f557c6a 100644 --- a/nlp/docs/using_docker.md +++ b/nlp/docs/using_docker.md @@ -2,28 +2,28 @@ ## Building and running your application -When you're ready, start your application by running: -```bash -docker compose up --build . -``` -Your application will be available at http://localhost:8000. +When you're ready, build your application by running: +```docker build -t rmoods-nlp .``` -## Deploying your application to the cloud +The ```-t``` parameter refers to tag, which is the name of the image. +You can replace ``rmoods-nlp`` with any name you like. -First, build your image, e.g.: ```bash docker build -t myapp . .``` -If your cloud uses a different CPU architecture than your development -machine (e.g., you are on a Mac M1 and your cloud provider is amd64), -you'll want to build the image for that platform, e.g.: +After the build completes, you can run your application using the following command: +```docker run --name rmoods-nlp -d rmoods-nlp``` -```bash -docker build --platform=linux/amd64 -t myapp . . -``` +The ```-d``` parameter runs the container in detached mode, which means the container +runs in the background ```--name``` refers to docker name, it can be used +instead of container id. -Then, push it to your registry, e.g. ```bash docker push myregistry.com/myapp . +If you want to open docker image console run: +```docker exec -it rmoods-nlp /bin/bash``` -Consult Docker's `getting started `_ -docs for more detail on building and pushing. +```rmoods-nlp``` is the name of the container. If we did not specify a name, +Docker would have assigned a random name to the container. To check the name of the container, run: +```docker ps``` -## References +To stop the container, run: +```docker stop rmoods-nlp``` -* `Docker's Python guide `_ \ No newline at end of file +## References +* [Docker's Python guide](https://docs.docker.com/language/python/) \ No newline at end of file diff --git a/nlp/docs/using_manager.md b/nlp/docs/using_manager.md index 7e4f9a8..b109f54 100644 --- a/nlp/docs/using_manager.md +++ b/nlp/docs/using_manager.md @@ -3,11 +3,11 @@ To use the models manager, run the following command: ```bash -./models.sh +./manager.sh ``` Manager commands: -- status - Check the status of the models +- remote - Check the status of the models - install - Install the models - clean - Delete all models from file system - upload - Upload the models to the server @@ -15,9 +15,9 @@ Manager commands: Model names is the optional parameter for upload and install: - If no model names are provided, all models will be installed/uploaded ```bash -./models.sh upload +./manager.sh upload ``` - To upload/install model give list of model names, separate them with a space ```bash -./models.sh install sentiment +./manager.sh install sentiment ``` \ No newline at end of file diff --git a/nlp/models.sh b/nlp/manager.sh similarity index 81% rename from nlp/models.sh rename to nlp/manager.sh index 4705c87..d879700 100755 --- a/nlp/models.sh +++ b/nlp/manager.sh @@ -11,7 +11,7 @@ case $command in PYTHONPATH=src python -c 'import sys, version_checker; version_checker.update_model_versions(sys.argv[1:])' "${array[@]}" fi ;; - "status") + "remote") PYTHONPATH=src python -c 'import version_checker; version_checker.get_status()' ;; "upload") @@ -20,8 +20,10 @@ case $command in else PYTHONPATH=src python -c 'import sys, version_checker; version_checker.upload_manager(sys.argv[1:])' "${array[@]}" fi - + ;; + "status") + PYTHONPATH=src python -c 'import version_checker; version_checker.get_status(True)' ;; "clean") rm -r models ;; - *) echo "Usage: $0 {install|clean|status}" ;; + *) echo "Usage: $0 {install|clean|status|remote}" ;; esac diff --git a/nlp/src/google_service.py b/nlp/src/google_service.py index f92136b..5e62d23 100644 --- a/nlp/src/google_service.py +++ b/nlp/src/google_service.py @@ -5,7 +5,8 @@ from google.auth.transport.requests import Request -def create_service(client_secret_file, api_name, api_version, scopes): +def create_service(client_secret_file: str, api_name: str, api_version: str, + scopes: list) -> object: """ Create a service object for the Google Drive API. Function is based on the Google API documentation: https://developers.google.com/drive/api/quickstart/python?hl=pl @@ -17,14 +18,8 @@ def create_service(client_secret_file, api_name, api_version, scopes): :return: The service object or None. """ - CLIENT_SECRET_FILE = client_secret_file - API_SERVICE_NAME = api_name - API_VERSION = api_version - SCOPES = scopes - cred = None - - pickle_file = f'token_{API_SERVICE_NAME}_{API_VERSION}.pickle' + pickle_file = f'token_{api_name}_{api_version}.pickle' if os.path.exists(pickle_file): with open(pickle_file, 'rb') as token: @@ -34,15 +29,15 @@ def create_service(client_secret_file, api_name, api_version, scopes): if cred and cred.expired and cred.refresh_token: cred.refresh(Request()) else: - flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE - , SCOPES) + flow = InstalledAppFlow.from_client_secrets_file(client_secret_file + , scopes) cred = flow.run_local_server(port=8002) with open(pickle_file, 'wb') as token: pickle.dump(cred, token) try: - service = build(API_SERVICE_NAME, API_VERSION, credentials=cred) + service = build(api_name, api_version, credentials=cred) return service except Exception as e: print('Unable to connect.') diff --git a/nlp/src/version_checker.py b/nlp/src/version_checker.py index 3a30526..30c9c96 100644 --- a/nlp/src/version_checker.py +++ b/nlp/src/version_checker.py @@ -1,6 +1,7 @@ import io import os import datetime +import pytz from httplib2 import ServerNotFoundError from google_service import create_service @@ -12,6 +13,9 @@ API_NAME = "drive" API_VERSION = "v3" SCOPES = ["https://www.googleapis.com/auth/drive"] +CROSS_MARK = u'\u274c' +APPROVAL_MARK = u'\u2705' +MISSING_MARK = u'\u2753' if os.path.exists("client_secret_file.json"): SERVICE = create_service("client_secret_file.json", API_NAME, @@ -20,24 +24,24 @@ print("client_secret_file.json file not found. Check if the file exists.") -def read_model_file(): +def read_model_file() -> dict: """ This function reads the file with the models version. :return: The data read from file. """ - with open("models.version", "r") as f: + with open("version_models.json", "r") as f: return json.load(f) -def find_folder(service, folder_name): +def find_folder(service: object, folder_name: str) -> str: """ This function finds the folder with the given name. :param service: The Google Drive service. :param folder_name: The name of the folder to find. - :return: The folder ID if found, None otherwise. + :return: The folder ID if found, empty string otherwise. """ try: query = (f"name='{folder_name}' and mimeType='application/" @@ -47,15 +51,17 @@ def find_folder(service, folder_name): return response["files"][0]["id"] except ServerNotFoundError as e: print(f"Server not found. Stopping looking for folder. {e}") - return None + return "" except TimeoutError as e: print(f"Connection timed out. Stopping download. {e}") - return None + return "" -def list_folder_contents(service, folder_id): +def list_folder_contents(service: object, folder_id: str) -> dict: """ - This function lists the contents of the folder with the given ID. + This function lists the contents of the folder with the given ID. Listed are only + files that are not with status trashed. Status trashed means that files are in the + trash bin, but they are still in the Google Drive. :param service: The Google Drive service. :param folder_id: The ID of the folder to list. @@ -63,7 +69,7 @@ def list_folder_contents(service, folder_id): :return: A dictionary containing the directories or files name and ID. """ try: - query = f"'{folder_id}' in parents" + query = f"'{folder_id}' in parents and trashed=false" response = service.files().list(q=query).execute() files = {} @@ -73,13 +79,14 @@ def list_folder_contents(service, folder_id): return files except ServerNotFoundError as e: print(f"Server not found. Stopping listing files. {e}") - return None + return {} except TimeoutError as e: print(f"Connection timed out. Stopping download. {e}") - return None + return {} -def download_file(service, file_id, file_name, models_directory): +def download_file(service: object, file_id: str, file_name: str, + models_directory: str) -> bool: """ This function downloads the file with the given ID. Based on Google Documentation https://developers.google.com/drive/api/guides/manage-downloads?hl=pl#python @@ -128,10 +135,10 @@ def download_file(service, file_id, file_name, models_directory): return False -def is_file_up_to_date(file_id, file_exists, file_path): +def is_file_up_to_date(file_id: str, file_exists: bool, file_path: str) -> bool: """ This function checks if the file is up to date. Comparing the created time - in Google Drive with the one in the models.version file. + in Google Drive with the one in the version_models.json file. :param file_id: The ID of the file to check. :param file_exists: True if the file exists, False otherwise. @@ -139,25 +146,35 @@ def is_file_up_to_date(file_id, file_exists, file_path): :return: True if the file is up to date, False otherwise. """ - file_created_time = (SERVICE.files().get(fileId=file_id, fields="createdTime") + file_created_time = (SERVICE.files().get(fileId=file_id, fields="createdTime,modifiedTime") .execute()) if file_exists: - online_date = datetime.datetime.strptime( + create_time = datetime.datetime.strptime( file_created_time['createdTime'], '%Y-%m-%dT%H:%M:%S.%fZ') + last_modified_time = datetime.datetime.strptime( + file_created_time['modifiedTime'], '%Y-%m-%dT%H:%M:%S.%fZ') + c_time = os.path.getctime(file_path) - formatted_time = datetime.datetime.fromtimestamp(c_time).strftime( - '%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' + time = datetime.datetime.fromtimestamp(c_time) + + local_tz = pytz.timezone("Europe/Warsaw") + local_time = local_tz.localize(time).astimezone(pytz.utc) + formatted_time = local_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ') local_date = datetime.datetime.strptime(formatted_time, '%Y-%m-%dT%H:%M:%S.%fZ') - return bool(online_date < local_date) + + if last_modified_time == create_time: + return bool(local_date > last_modified_time) + else: + return False else: return False -def get_version(folder_id, current_version, model_name): +def get_version(folder_id: str, current_version: str, model_name: str) -> bool: """ This function is the main function of the script. It reads the file containing model versions, finds the folder with the given name, lists the @@ -177,29 +194,28 @@ def get_version(folder_id, current_version, model_name): folder_with_current_version_id) if len(list_files) > 0: - file_id = list(list_files.values())[0] - file_name = list(list_files.keys())[0] - models_directory = f"models/{model_name}/{current_version}" - full_models_path = f"{models_directory}/{file_name}" - - file_exists = os.path.exists(full_models_path) - newest = is_file_up_to_date(file_id, file_exists, full_models_path) - - if not newest or not file_exists: - status = download_file(SERVICE, file_id, file_name, - models_directory) - if not status: - print(f"An error occurred while downloading the " - f"file {model_name}.") - return False + for file_name, file_id in list_files.items(): + models_directory = f"models/{model_name}/{current_version}" + full_models_path = f"{models_directory}/{file_name}" + + file_exists = os.path.exists(full_models_path) + newest = is_file_up_to_date(file_id, file_exists, full_models_path) + + if not newest or not file_exists: + status = download_file(SERVICE, file_id, file_name, + models_directory) + if not status: + print(f"An error occurred while downloading the " + f"file {model_name}.") + return False return True -def update_model_versions(models_names=None): +def update_model_versions(models_names: str = "") -> bool: """ - This function is the main function of the script. It reads the file - containing model versions, finds the folder with the given name, lists the - contents of the folder and downloads the file if it is outdated. + This function reads the file containing model versions, finds the folder with the + given name, lists the contents of the folder and downloads the file if + it is outdated. :param models_names: The names of the models to update. @@ -207,7 +223,7 @@ def update_model_versions(models_names=None): False otherwise. """ data = read_model_file() - if models_names is None: + if models_names == "": models_names = [] [models_names.append(x) for x in data] @@ -217,14 +233,14 @@ def update_model_versions(models_names=None): try: current_version = data[model_name] except KeyError: - print(f"Model {model_name} not found in the models.version file. " + print(f"Model {model_name} not found in the version_models.json file. " f"Check out name of the model file.") errors += 1 continue folder_id = find_folder(SERVICE, model_name) - if folder_id is None: + if folder_id == "": return False download_status = get_version(folder_id, current_version, @@ -238,61 +254,84 @@ def update_model_versions(models_names=None): return bool(count_correct == len(models_names) - errors) -def get_status_information(service, parent_id='root', level=0, path='models/' - , print_output=True): +def get_status_information(data: dict, service: object, parent_id: str = 'root', + level: int = 0, path: str = 'models/', + local_status: bool = False, parent_name: str = "") -> None: """ - This function lists all folders and their subfolders recursively. + This function lists all folders, their subfolders and files inside it located + online on Google Drive. With the parameter local_status set to True, it checks + if the folders and files are up to date with the local files, based on + version_models.json file. Otherwise, it lists all the files and folders online, and + checks if they are up to date with the local files. + :param data: The data read from the version_models.json file. :param service: The Google Drive service. :param parent_id: The ID of the parent folder. :param level: The current level of recursion (used for indentation). :param path: The path to the current folder. - :param print_output: True if the function should print the folders and files. + :param local_status: True if the local status should be checked, False if we want + to check online status. + :param parent_name: The name of the parent folder. """ try: query = (f"'{parent_id}' in parents and mimeType='application/" - f"vnd.google-apps.folder'") + f"vnd.google-apps.folder' and trashed=false") response = service.files().list(q=query).execute() folders = response.get('files', []) + if not folders: files = list_folder_contents(service, parent_id) for file in files: file_exists = os.path.exists(path + file) is_not_outdated = is_file_up_to_date(files[file], file_exists, path + file) - if (not file_exists) or (not is_not_outdated): - file_conditions = " ---> File diverged from online version" + if not file_exists: + file_conditions = CROSS_MARK + elif not is_not_outdated: + file_conditions = MISSING_MARK else: - file_conditions = "" + file_conditions = APPROVAL_MARK - if print_output: - print(' ' * level + f"File online: {file}{file_conditions}") + print(' ' * level + f"File local: {file} {file_conditions}") for folder in folders: + skip = False + if local_status: + if folder['name'] not in data.keys(): + try: + file_version_exist = folder['name'] == data[parent_name] + except KeyError: + file_version_exist = False + skip = not file_version_exist + + if skip: + continue + if not os.path.exists(path + folder['name']): - folder_conditions = " ---> Folder diverged from online version" + folder_conditions = CROSS_MARK else: - folder_conditions = "" - - if print_output: - print(' ' * level + f"Folder online: {folder['name']} " - f"{folder_conditions}") + folder_conditions = APPROVAL_MARK - get_status_information(service, folder['id'], level + 1, path + - folder['name'] + '/', print_output) + print(' ' * level + f"Folder local: {folder['name']} " + f" {folder_conditions}") + parent_name = folder['name'] + get_status_information(data, service, folder['id'], level + 1, path + + folder['name'] + '/', local_status=local_status, + parent_name=parent_name) except ServerNotFoundError as e: print(f"Server not found. Stopping listing files. {e}") except TimeoutError as e: print(f"Connection timed out. Stopping listing files. {e}") -def get_status(): +def get_status(local_status: bool = False) -> None: """This function prints the status of the files and folders.""" - get_status_information(SERVICE) + data = read_model_file() + get_status_information(data, SERVICE, local_status=local_status) -def create_folder(folder_name, parent_id='root'): +def create_folder(folder_name: str, parent_id: str = 'root') -> str: """ This function creates a folder in the Google Drive. :param folder_name: The name of the folder to create. @@ -313,7 +352,8 @@ def create_folder(folder_name, parent_id='root'): return file.get('id') -def create_file(folder_name_id, version_folder_id, file_path, file_name): +def create_file(folder_name_id: str, version_folder_id: str, file_path: str, + file_name: str) -> dict: """ This function creates a file in the Google Drive folder. :param folder_name_id: The ID of the folder to create the file. @@ -324,7 +364,7 @@ def create_file(folder_name_id, version_folder_id, file_path, file_name): :return: The file created. Or False if the folder_name_id or version_folder_id is None. """ if folder_name_id is None or version_folder_id is None: - return False + return {} file_metadata = { 'name': file_name, @@ -337,7 +377,7 @@ def create_file(folder_name_id, version_folder_id, file_path, file_name): return file -def upload_file(folder_name, version, file_name): +def upload_file(folder_name: str, version: str, file_name: str) -> bool: """ This function uploads the file to the Google Drive folder. :param folder_name: The name of the folder to upload. @@ -353,13 +393,13 @@ def upload_file(folder_name, version, file_name): return False folder_id = find_folder(SERVICE, folder_name) - if folder_id is None: + if folder_id == "": folder_name_id = create_folder(folder_name) version_folder_id = create_folder(version, folder_name_id) file_create = create_file(folder_name_id, version_folder_id, file_path, file_name) - if file_create is None: + if file_create == {}: return False print(f"File {file_name} uploaded successfully. Model file updated.") @@ -390,7 +430,7 @@ def upload_file(folder_name, version, file_name): answer = input(f"File {file_name} already exists Do you " f"want to overwrite the file? (y/n): ") if answer == 'n': - return None + return False elif answer == 'y': proceed = True break @@ -419,7 +459,7 @@ def upload_file(folder_name, version, file_name): return False -def upload_manager(folders=None): +def upload_manager(folders: str = None) -> None: """ This function manages uploads to the Google Drive folder. :param folders: The names of the folders to upload. If None, all folders are uploaded. @@ -433,13 +473,14 @@ def upload_manager(folders=None): version = data[folder] files = os.listdir(f"models/{folder}/{version}") for file_name in files: - print(f"Uploading {folder} model with version {version}, file {file_name}") + print( + f"Uploading {folder} model with version {version}, file {file_name}") update_successful = upload_file(folder, version, file_name) - if update_successful is None: + if update_successful == {}: print(f"File {file_name} already exists. Skipping file.") elif not update_successful: print(f"Error occurred while uploading the file {file_name}.") except KeyError: - print(f"Model {folder} not found in the models.version file. " + print(f"Model {folder} not found in the version_models.json file. " f"Check out name of the model file.") continue diff --git a/nlp/models.version b/nlp/version_models.json similarity index 100% rename from nlp/models.version rename to nlp/version_models.json From ee4ae0c635827a8d93ee099ebb90ae117e5fbb13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Ja=C5=82oszy=C5=84ski?= Date: Sat, 19 Oct 2024 14:42:48 +0200 Subject: [PATCH 4/4] Add library. Repair download and upload. Update docs. --- nlp/README.md | 20 +++--- nlp/docs/using_manager.md | 14 +++-- nlp/manager.sh | 2 +- nlp/requirements.txt | 3 +- nlp/src/version_checker.py | 123 +++++++++++++++++-------------------- 5 files changed, 84 insertions(+), 78 deletions(-) diff --git a/nlp/README.md b/nlp/README.md index d149e88..ef67828 100644 --- a/nlp/README.md +++ b/nlp/README.md @@ -1,6 +1,6 @@ # RMoods NLP Backend -## Using python virtual environment +### Using python virtual environment To create a python virtual environment, run the following command: ```python -m venv .venv``` @@ -19,31 +19,37 @@ To exit the virtual environment, run the following command: ```deactivate``` -## Running the server +### Running the server To run the server, run the following command: ```python src/main.py``` -## Running the tests +### Running the tests To run the tests, run the following command: ```python -m pytest``` ## Using the models manager -To use the models manager, run the following command: +#### To use the models manager, run the following command: ```bash ./manager.sh ``` -Manager commands: -- status - Check the status of the models +#### Manager commands: +- remote - Check the status of the models online - install - Install the models - clean - Delete all models from file system - upload - Upload the models to the server +- status - Check the status of local models with Google Drive -Model names is the optional parameter for upload and install: +#### Status and remote command marks: +- ❌ - Not installed +- ✅ - Installed +- ❓ - Version mismatch + +#### Model names is the optional parameter for upload and install: - If no model names are provided, all models will be installed/uploaded ```bash ./manager.sh upload diff --git a/nlp/docs/using_manager.md b/nlp/docs/using_manager.md index b109f54..9dd0ef2 100644 --- a/nlp/docs/using_manager.md +++ b/nlp/docs/using_manager.md @@ -1,18 +1,24 @@ # Using the models manager -To use the models manager, run the following command: +#### To use the models manager, run the following command: ```bash ./manager.sh ``` -Manager commands: -- remote - Check the status of the models +#### Manager commands: +- remote - Check the status of the models online - install - Install the models - clean - Delete all models from file system - upload - Upload the models to the server +- status - Check the status of local models with Google Drive -Model names is the optional parameter for upload and install: +#### Status and remote command marks: +- ❌ - Not installed +- ✅ - Installed +- ❓ - Version mismatch + +#### Model names is the optional parameter for upload and install: - If no model names are provided, all models will be installed/uploaded ```bash ./manager.sh upload diff --git a/nlp/manager.sh b/nlp/manager.sh index d879700..8766b0d 100755 --- a/nlp/manager.sh +++ b/nlp/manager.sh @@ -25,5 +25,5 @@ case $command in PYTHONPATH=src python -c 'import version_checker; version_checker.get_status(True)' ;; "clean") rm -r models ;; - *) echo "Usage: $0 {install|clean|status|remote}" ;; + *) echo "Usage: $0 {install|clean|status|remote|upload}" ;; esac diff --git a/nlp/requirements.txt b/nlp/requirements.txt index 8dfa2eb..6117024 100644 --- a/nlp/requirements.txt +++ b/nlp/requirements.txt @@ -9,4 +9,5 @@ google-auth-oauthlib==1.2.1 tqdm==4.66.5 myst-parser==4.0.0 sphinx-code-include==1.4.0 -httplib2==0.22.0 \ No newline at end of file +httplib2==0.22.0 +pytz==2024.2 \ No newline at end of file diff --git a/nlp/src/version_checker.py b/nlp/src/version_checker.py index 30c9c96..e0a4d19 100644 --- a/nlp/src/version_checker.py +++ b/nlp/src/version_checker.py @@ -103,27 +103,19 @@ def download_file(service: object, file_id: str, file_name: str, done = False file = io.BytesIO() + file.seek(0) downloader = MediaIoBaseDownload(file, request) progress_bar = tqdm(total=100) while done is False: - try: - status, done = downloader.next_chunk() - progress_bar.update(status.progress() * 100) - except HttpError as e: - print(f"Connection lost. Stopping download. {e}") - return False - except TimeoutError as e: - print(f"Connection timed out. Stopping download. {e}") - return False - except ServerNotFoundError as e: - print(f"Server not found. Stopping download. {e}") - return False + status, done = downloader.next_chunk() + progress_bar.update(status.progress() * 100) progress_bar.close() if not os.path.exists(models_directory): os.makedirs(models_directory) + file.seek(0) with open(os.path.join(models_directory, file_name), "wb") as f: f.write(file.read()) @@ -168,8 +160,7 @@ def is_file_up_to_date(file_id: str, file_exists: bool, file_path: str) -> bool: if last_modified_time == create_time: return bool(local_date > last_modified_time) - else: - return False + return False else: return False @@ -296,13 +287,12 @@ def get_status_information(data: dict, service: object, parent_id: str = 'root', for folder in folders: skip = False - if local_status: - if folder['name'] not in data.keys(): - try: - file_version_exist = folder['name'] == data[parent_name] - except KeyError: - file_version_exist = False - skip = not file_version_exist + if local_status and folder['name'] not in data.keys(): + try: + file_version_exist = folder['name'] == data[parent_name] + except KeyError: + file_version_exist = False + skip = not file_version_exist if skip: continue @@ -361,7 +351,8 @@ def create_file(folder_name_id: str, version_folder_id: str, file_path: str, :param file_path: The path to the file to upload. :param file_name: The name of the file to upload. - :return: The file created. Or False if the folder_name_id or version_folder_id is None. + :return: The file created. Or False if the folder_name_id or version_folder_id + is None. """ if folder_name_id is None or version_folder_id is None: return {} @@ -404,50 +395,51 @@ def upload_file(folder_name: str, version: str, file_name: str) -> bool: print(f"File {file_name} uploaded successfully. Model file updated.") return True - else: - query = (f"'{folder_id}' in parents and mimeType='application/" - f"vnd.google-apps.folder'") - response = SERVICE.files().list(q=query).execute() - folders = response.get('files', []) - - version_folder_id = None - for folder in folders: - if folder['name'] == version: - version_folder_id = folder['id'] - break - - if version_folder_id is None: - version_folder_id = create_folder(version, folder_id) - response_files = SERVICE.files().list(q=f"'{version_folder_id}' " - f"in parents").execute() - files = response_files.get('files', []) + query = (f"'{folder_id}' in parents and mimeType='application/" + f"vnd.google-apps.folder'") + response = SERVICE.files().list(q=query).execute() + folders = response.get('files', []) - proceed = False - for file in files: - if file['name'] == file_name: - while True: - answer = input(f"File {file_name} already exists Do you " - f"want to overwrite the file? (y/n): ") - if answer == 'n': - return False - elif answer == 'y': - proceed = True - break - else: - continue - if proceed: - body_value = {'trashed': True} - SERVICE.files().update(fileId=file['id'], body=body_value).execute() - break - - file_create = create_file(folder_id, version_folder_id, - file_path, file_name) - if file_create is None: - return False + version_folder_id = None + for folder in folders: + if folder['name'] == version: + version_folder_id = folder['id'] + break + + if version_folder_id is None: + version_folder_id = create_folder(version, folder_id) + + response_files = SERVICE.files().list(q=f"'{version_folder_id}' " + f"in parents").execute() + files = response_files.get('files', []) + + proceed = False + for file in files: + if file['name'] == file_name: + while True: + answer = input(f"File {file_name} already exists Do you " + f"want to overwrite the file? (y/n): ") + if answer == 'n': + print("File skipped.") + return False + elif answer == 'y': + proceed = True + break + else: + continue + if proceed: + body_value = {'trashed': True} + SERVICE.files().update(fileId=file['id'], body=body_value).execute() + break + + file_create = create_file(folder_id, version_folder_id, + file_path, file_name) + if file_create is None: + return False - print(f"File {file_name} uploaded successfully. Models file updated.") - return True + print(f"File {file_name} uploaded successfully. Models file updated.") + return True except HttpError as e: print(f"An error occurred: {e}") return False @@ -462,7 +454,8 @@ def upload_file(folder_name: str, version: str, file_name: str) -> bool: def upload_manager(folders: str = None) -> None: """ This function manages uploads to the Google Drive folder. - :param folders: The names of the folders to upload. If None, all folders are uploaded. + :param folders: The names of the folders to upload. If None, all folders are + uploaded. """ if folders is None: folders = read_model_file() @@ -479,7 +472,7 @@ def upload_manager(folders: str = None) -> None: if update_successful == {}: print(f"File {file_name} already exists. Skipping file.") elif not update_successful: - print(f"Error occurred while uploading the file {file_name}.") + print(f"Error occurred with file {file_name}.") except KeyError: print(f"Model {folder} not found in the version_models.json file. " f"Check out name of the model file.")