From b95aba4c0395ff6f5924bfe1430ff711660cbf29 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 11 Nov 2024 21:56:48 +0100
Subject: [PATCH 01/92] docs: drafting doc changes, docker as main distribution
 channel

---
 README.md | 44 +++++++++++++++++++++++++++++++++++++++-----
 makefile  |  3 +--
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 2416aec..a2d53d7 100644
--- a/README.md
+++ b/README.md
@@ -6,20 +6,56 @@ Tools in this repository were used to create the **[SC2ReSet: StarCraft II Espor
 
 ## Installation
 
-To install current version of the toolset as separate CLI tools run the following command:
+Our prefered way of distributing the toolset is through DockerHub. We Use the Docker Image to provide a fully reproducible environment for our scripts.
+
+To build the image please run the following command:
+
+```bash
+make docker_build
+```
+
+After building the image, please refer to the **[Command Line Arguments Usage](#command-line-arguments-usage)** section for the usage of the scripts.
+
+<!-- To install current version of the toolset as separate CLI tools run the following command:
 
 ```
 pip install datasetpreparator[all]
 ```
 
-After that each of the scripts should be available to call from the command line directly.
+After that each of the scripts should be available to call from the command line directly. -->
+
+## Command Line Arguments Usage
+
+When using Docker, you will have to pass the arguments through the `docker run` command and mount the input/output directory. Below is an example of how to run the `directory_flattener` script using Docker. For ease of use we have prepared example directory structure in the `processing` directory. The command below uses that to issue a command to flatten the directory structure:
+
+```bash
+docker run -v "./processing:/app/processing" datasetpreparator python3 directory_flattener.py --input_path /app/processing/directory_flattener/input --output_path /app/processing/directory_flattener/output
+```
+
+Each of the scripts has its usage described in their respective `README.md` files, you can find the table of contents below.
+
+### Table of Contents
 
-## Dataset Preparation Steps
+#### Generic scripts
+1. [dir_packager](src/dir_packager/README.md)
+2. [directory_flattener](src/directory_flattener/README.md)
+3. [file_renamer](src/file_renamer/README.md)
+4. [json_merger](src/json_merger/README.md)
+5. [processed_mapping_copier](src/processed_mapping_copier/README.md)
+
+#### StarCraft 2 specific scripts
+1. [sc2_map_downloader](src/sc2/sc2_map_downloader/README.md)
+2. [sc2_replaypack_processor](src/sc2/sc2_replaypack_processor/README.md)
+3. [sc2reset_replaypack_downloader](src/sc2/sc2reset_replaypack_downloader/README.md)
+
+## SC2EGSet Dataset Preparation Steps
 
 To reproduce our experience with defining a dataset and to be able to compare your results with our work we describe how to perform the processing below.
 
 ### Using Docker
 
+We provide a release image containing all of the scripts. To see the usage of these scripts please refer to their respective ``README.md`` files.
+
 1. Build the docker image from: https://github.com/Kaszanas/SC2InfoExtractorGo
 2. Run the commands as described in the ```makefile```. But first make sure that all of the script parameters are set according to your needs.
 
@@ -38,9 +74,7 @@ To reproduce our experience with defining a dataset and to be able to compare yo
 
 In order to specify different processing flags for https://github.com/Kaszanas/SC2InfoExtractorGo please modify the ```sc2_replaypack_processor.py``` file directly
 
-## Command Line Arguments Usage
 
-Each of the scripts has its usage described in their respective `README.md` files.
 
 ## Contributing and Reporting Issues
 
diff --git a/makefile b/makefile
index 8c1e207..8d5e65a 100644
--- a/makefile
+++ b/makefile
@@ -11,7 +11,6 @@ TEST_COMMAND = "poetry run pytest --durations=100 --ignore-glob='test_*.py' test
 ###################
 all: ## Runs the entire processing pipeline to recreate SC2ReSet and SC2EGSet or any other dataset using our standard tooling.
 	@make flatten
-	@make json_merge
 	@make process_replaypack
 	@make rename_files
 	@make package_dataset
@@ -78,7 +77,7 @@ docker_build_dev: ## Builds the development image containing all of the tools.
 	-f ./docker/Dockerfile.dev . \
 	--tag=datasetpreparator:devcontainer
 
-docker_run_test:
+docker_run_test: ## Runs the test command using Docker.
 	docker run \
 		-v "${PWD}:/app" \
 		-e "TEST_WORKSPACE=/app" \

From c6a62c1b8cd60e353879278b6b8f1a943184d2a3 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 11 Nov 2024 21:59:11 +0100
Subject: [PATCH 02/92] docs: adjusted readibility

---
 README.md | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index a2d53d7..97487b2 100644
--- a/README.md
+++ b/README.md
@@ -32,21 +32,21 @@ When using Docker, you will have to pass the arguments through the `docker run`
 docker run -v "./processing:/app/processing" datasetpreparator python3 directory_flattener.py --input_path /app/processing/directory_flattener/input --output_path /app/processing/directory_flattener/output
 ```
 
-Each of the scripts has its usage described in their respective `README.md` files, you can find the table of contents below.
-
 ### Table of Contents
 
-#### Generic scripts
-1. [dir_packager](src/dir_packager/README.md)
-2. [directory_flattener](src/directory_flattener/README.md)
-3. [file_renamer](src/file_renamer/README.md)
-4. [json_merger](src/json_merger/README.md)
-5. [processed_mapping_copier](src/processed_mapping_copier/README.md)
-
-#### StarCraft 2 specific scripts
-1. [sc2_map_downloader](src/sc2/sc2_map_downloader/README.md)
-2. [sc2_replaypack_processor](src/sc2/sc2_replaypack_processor/README.md)
-3. [sc2reset_replaypack_downloader](src/sc2/sc2reset_replaypack_downloader/README.md)
+Each of the scripts has its usage described in their respective `README.md` files, you can find the table of contents below.
+
+#### CLI Usage Generic scripts
+1. [dir_packager: README](src/dir_packager/README.md)
+2. [directory_flattener: README](src/directory_flattener/README.md)
+3. [file_renamer: README](src/file_renamer/README.md)
+4. [json_merger: README](src/json_merger/README.md)
+5. [processed_mapping_copier: README](src/processed_mapping_copier/README.md)
+
+#### CLI Usage StarCraft 2 Specific Scripts
+1. [sc2_map_downloader: README](src/sc2/sc2_map_downloader/README.md)
+2. [sc2_replaypack_processor: README](src/sc2/sc2_replaypack_processor/README.md)
+3. [sc2reset_replaypack_downloader: README](src/sc2/sc2reset_replaypack_downloader/README.md)
 
 ## SC2EGSet Dataset Preparation Steps
 

From 776de34af2260821495550ff4c9ab3d691b61f1e Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 11 Nov 2024 22:15:21 +0100
Subject: [PATCH 03/92] docs: adjusted the description in processed mapping
 copier

---
 .../processed_mapping_copier/processed_mapping_copier.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
index 9acd701..42b44c2 100644
--- a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
+++ b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
@@ -43,7 +43,7 @@ def processed_mapping_copier(input_path: str, output_path: str) -> None:
 
 
 @click.command(
-    help="Tool for copying the processed_mapping.json files that are required to define the StarCraft 2 (SC2) dataset."
+    help="Tool for copying the processed_mapping.json files to the matching directory after processing the replaypack into a JSON dataset. This step is required to define the StarCraft 2 (SC2) dataset."
 )
 @click.option(
     "--input_path",

From e7486f4b1ea11110baabdc0fe9abd78a194041f0 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 11 Nov 2024 22:15:49 +0100
Subject: [PATCH 04/92] docs: added full package names in README

---
 README.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 97487b2..0ce806b 100644
--- a/README.md
+++ b/README.md
@@ -37,16 +37,16 @@ docker run -v "./processing:/app/processing" datasetpreparator python3 directory
 Each of the scripts has its usage described in their respective `README.md` files, you can find the table of contents below.
 
 #### CLI Usage Generic scripts
-1. [dir_packager: README](src/dir_packager/README.md)
-2. [directory_flattener: README](src/directory_flattener/README.md)
-3. [file_renamer: README](src/file_renamer/README.md)
-4. [json_merger: README](src/json_merger/README.md)
-5. [processed_mapping_copier: README](src/processed_mapping_copier/README.md)
+1. [Directory Packager (dir_packager): README](src/dir_packager/README.md)
+2. [Directory Flattener (directory_flattener): README](src/directory_flattener/README.md)
+3. [File Renamer (file_renamer): README](src/file_renamer/README.md)
+4. [JSON Merger (json_merger): README](src/json_merger/README.md)
+5. [Processed Mapping Copier (processed_mapping_copier): README](src/processed_mapping_copier/README.md)
 
 #### CLI Usage StarCraft 2 Specific Scripts
-1. [sc2_map_downloader: README](src/sc2/sc2_map_downloader/README.md)
-2. [sc2_replaypack_processor: README](src/sc2/sc2_replaypack_processor/README.md)
-3. [sc2reset_replaypack_downloader: README](src/sc2/sc2reset_replaypack_downloader/README.md)
+1. [SC2 Map Downloader (sc2_map_downloader): README](src/sc2/sc2_map_downloader/README.md)
+2. [SC2 Replaypack Processor (sc2_replaypack_processor): README](src/sc2/sc2_replaypack_processor/README.md)
+3. [SC2ReSet Replaypack Downloader (sc2reset_replaypack_downloader): README](src/sc2/sc2reset_replaypack_downloader/README.md)
 
 ## SC2EGSet Dataset Preparation Steps
 

From ef5a36dae8263d6a454603ade85644a8deb4bb5c Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 11 Nov 2024 22:23:21 +0100
Subject: [PATCH 05/92] docs: simplified docs, sc2egset using docker

---
 README.md | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 0ce806b..fefbedd 100644
--- a/README.md
+++ b/README.md
@@ -54,26 +54,24 @@ To reproduce our experience with defining a dataset and to be able to compare yo
 
 ### Using Docker
 
-We provide a release image containing all of the scripts. To see the usage of these scripts please refer to their respective ``README.md`` files.
+We provide a release image containing all of the scripts. To see the usage of these scripts please refer to their respective ``README.md`` files as described above.
 
+The following steps were used to prepare the SC2EGSet dataset:
 1. Build the docker image from: https://github.com/Kaszanas/SC2InfoExtractorGo
-2. Run the commands as described in the ```makefile```. But first make sure that all of the script parameters are set according to your needs.
-
-### Using Python
-
-0. Obtain replays to process. This can be a replaypack or your own replay folder.
-1. Download latest version of [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo), or build it from source.
-2. **Optional** If the replays that you have are held in nested directories it is best to use  ```src/directory_flattener.py```. This will copy the directory and place all of the files to the top directory where it can be further processed. In order to preserve the old directory structure, a .json file is created. The file contains the old directory tree to a mapping: ```{"replayUniqueHash": "whereItWasInOldStructure"}```. This step is is required in order to properly use [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) as it only lists the files immediately available on the top level of the input directory. [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo).
-3. **Optional** Use the map downloader ```src/sc2_map_downloader.py``` to download maps that were used in the replays that you obtained. This is required for the next step.
-4. **Optional** Use the [SC2MapLocaleExtractor](https://github.com/Kaszanas/SC2MapLocaleExtractor) to obtain the mapping of ```{"foreign_map_name": "english_map_name"}``` which is required for the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) to translate the map names in the output .json files.
-5. Perform replaypack processing using ```src/sc2_replaypack_processor.py``` with the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) placed in PATH, or next to the script.
-6. **Optional** Using the ```src/file_renamer.py```, rename the files that were generated in the previous step. This is not required and is done to increase the readibility of the directory structure for the output.
-7. Using the ```src/file_packager.py```, create .zip archives containing the datasets and the supplementary files. By finishing this stage, your dataset should be ready to upload.
-
-#### Customization
-
-In order to specify different processing flags for https://github.com/Kaszanas/SC2InfoExtractorGo please modify the ```sc2_replaypack_processor.py``` file directly
-
+2. Build the docker image for the DatasetPreparator using the provided ```makefile``` command: ```make docker_build```.
+3. Place the input replaypacks into `./processing/directory_flattener/` directory.
+4. Run the command ```make all``` to process the replaypacks and create the dataset. The output will be placed in `./processing/sc2_replaypack_processor/output` directory.
+
+<!-- ### Using Python
+
+1. Obtain replays to process. This can be a replaypack or your own replay folder.
+2. Download latest version of [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo), or build it from source.
+3. **Optional** If the replays that you have are held in nested directories it is best to use  ```src/directory_flattener.py```. This will copy the directory and place all of the files to the top directory where it can be further processed. In order to preserve the old directory structure, a .json file is created. The file contains the old directory tree to a mapping: ```{"replayUniqueHash": "whereItWasInOldStructure"}```. This step is is required in order to properly use [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) as it only lists the files immediately available on the top level of the input directory. [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo).
+4. **Optional** Use the map downloader ```src/sc2_map_downloader.py``` to download maps that were used in the replays that you obtained. This is required for the next step.
+5. **Optional** Use the [SC2MapLocaleExtractor](https://github.com/Kaszanas/SC2MapLocaleExtractor) to obtain the mapping of ```{"foreign_map_name": "english_map_name"}``` which is required for the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) to translate the map names in the output .json files.
+6. Perform replaypack processing using ```src/sc2_replaypack_processor.py``` with the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) placed in PATH, or next to the script.
+7. **Optional** Using the ```src/file_renamer.py```, rename the files that were generated in the previous step. This is not required and is done to increase the readibility of the directory structure for the output.
+8. Using the ```src/file_packager.py```, create .zip archives containing the datasets and the supplementary files. By finishing this stage, your dataset should be ready to upload. -->
 
 
 ## Contributing and Reporting Issues

From d5e28cb829c4a12344a13f010c9e667fcf08515c Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 11 Nov 2024 23:05:06 +0100
Subject: [PATCH 06/92] refactor: no random uuid, using file hash in flattener

---
 .../directory_flattener/README.md             |  2 +-
 .../directory_flattener.py                    | 28 +++++++++++++++++--
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/README.md b/src/datasetpreparator/directory_flattener/README.md
index 5e402a8..3bee53b 100644
--- a/src/datasetpreparator/directory_flattener/README.md
+++ b/src/datasetpreparator/directory_flattener/README.md
@@ -1,6 +1,6 @@
 # Directory Flattener
 
-Utility script for entering each of the supplied replaypacks and flattening its structure.
+Utility script for entering each of the supplied replaypacks and flattening its structure. Please note that in the process of flattening the structure, the script will also rename the files using their hash values. Hashing of filenames is done to alleviate the potential files with the same name in different directories.
 
 # CLI Usage
 
diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index e30a9d9..08c68fd 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -1,7 +1,6 @@
 import os
 from pathlib import Path
 from typing import Dict, List, Tuple
-import uuid
 import json
 import shutil
 import logging
@@ -9,6 +8,7 @@
 import click
 
 from datasetpreparator.settings import LOGGING_FORMAT
+import hashlib
 
 
 def save_dir_mapping(output_path: str, dir_mapping: dict) -> None:
@@ -27,6 +27,30 @@ def save_dir_mapping(output_path: str, dir_mapping: dict) -> None:
         json.dump(dir_mapping, json_file)
 
 
+def calculate_file_hash(file_path: Path) -> str:
+    """
+    Calculates the file hash using the selected algorithm.
+
+    Parameters
+    ----------
+    file_path : Path
+        Path to the file which will be hashed.
+
+    Returns
+    -------
+    str
+        Returns the hash of the file.
+    """
+
+    # Open the file, read it in binary mode and calculate the hash:
+    with open(file_path, "rb") as file:
+        file_hash = hashlib.md5()
+        while chunk := file.read(4096):
+            file_hash.update(chunk)
+
+    return file_hash.hexdigest()
+
+
 def directory_flatten(
     root_directory: Path,
     list_of_files: List[Path],
@@ -54,7 +78,7 @@ def directory_flatten(
     dir_structure_mapping = {}
     for file in list_of_files:
         # Get unique filename:
-        unique_filename = uuid.uuid4().hex
+        unique_filename = calculate_file_hash(file)
         original_extension = file.suffix
         new_path_and_filename = Path(dir_output_path, unique_filename).with_suffix(
             original_extension

From 3d03363441fa8d9083b8c8d4cdcf4c4434673737 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 11 Nov 2024 23:13:43 +0100
Subject: [PATCH 07/92] docs: fixing typo in PR template

---
 .github/PULL_REQUEST_TEMPLATE.MD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.MD b/.github/PULL_REQUEST_TEMPLATE.MD
index e9abd0d..39c679c 100644
--- a/.github/PULL_REQUEST_TEMPLATE.MD
+++ b/.github/PULL_REQUEST_TEMPLATE.MD
@@ -2,7 +2,7 @@
 ## Description
 <!--- Describe your changes in detail -->
 
-## Related IssueS
+## Related Issues
 <!--- This project only accepts pull requests related to open issues -->
 <!--- If suggesting a new feature or change, please discuss it in an issue first -->
 <!--- If fixing a bug, there should be an issue describing it with steps to reproduce -->

From b5b9c9bebae89442a270e5f6dca979f86d0bb8db Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Tue, 12 Nov 2024 14:28:05 +0100
Subject: [PATCH 08/92] refactor: multiprocessing off in
 sc2_replaypack_processor

Currently the issue with multiprocessing is that the SC2InfoExtractorGo downloads the maps to get the localized map name into the final JSON file. Due to this there cannot be multiple processes running unless all of the required maps are download prior to executing the SC2InfoExtractorGo. This could be changed in the future if the first pass is ran with -download_maps_only
---
 .../sc2_replaypack_processor.py                      | 12 ++++++++----
 .../sc2_replaypack_processor/utils/multiprocess.py   |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/sc2_replaypack_processor.py b/src/datasetpreparator/sc2/sc2_replaypack_processor/sc2_replaypack_processor.py
index da68549..91fae9f 100644
--- a/src/datasetpreparator/sc2/sc2_replaypack_processor/sc2_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2_replaypack_processor/sc2_replaypack_processor.py
@@ -10,7 +10,7 @@
     SC2InfoExtractorGoArguments,
 )
 from datasetpreparator.sc2.sc2_replaypack_processor.utils.multiprocess import (
-    multiprocessing_scheduler,
+    process_single_replaypack,
 )
 
 
@@ -29,7 +29,7 @@ def sc2_replaypack_processor(
 
     input_path = arguments.input_path
     output_path = arguments.output_path
-    n_processes = arguments.n_processes
+    # n_processes = arguments.n_processes
 
     multiprocessing_list = []
     for maybe_dir in tqdm(list(input_path.iterdir())):
@@ -67,11 +67,15 @@ def sc2_replaypack_processor(
 
         multiprocessing_list.append(sc2_info_extractor_go_args)
 
-    multiprocessing_scheduler(multiprocessing_list, int(n_processes))
+    for args in multiprocessing_list:
+        logging.debug(f"Processing: {args}")
+        process_single_replaypack(arguments=args)
+
+    # multiprocessing_scheduler(multiprocessing_list, int(n_processes))
 
 
 @click.command(
-    help="Tool used for processing StarCraft 2 (SC2) datasets. with https://github.com/Kaszanas/SC2InfoExtractorGo."
+    help="Tool used to execute SC2InfoExtractorGo (https://github.com/Kaszanas/SC2InfoExtractorGo) on multiple replaypack directories. Assists in processing StarCraft 2 (SC2) datasets."
 )
 @click.option(
     "--input_path",
diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/utils/multiprocess.py b/src/datasetpreparator/sc2/sc2_replaypack_processor/utils/multiprocess.py
index a951a93..330451e 100644
--- a/src/datasetpreparator/sc2/sc2_replaypack_processor/utils/multiprocess.py
+++ b/src/datasetpreparator/sc2/sc2_replaypack_processor/utils/multiprocess.py
@@ -27,12 +27,12 @@ def multiprocessing_scheduler(
     """
 
     with Pool(processes=number_of_processes) as pool:
-        pool.imap_unordered(multiprocessing_client, processing_arguments)
+        pool.imap_unordered(process_single_replaypack, processing_arguments)
         pool.close()
         pool.join()
 
 
-def multiprocessing_client(arguments: SC2InfoExtractorGoArguments) -> None:
+def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
     """
     Responsible for running a single process that will
     extract data from a replaypack.

From b7c31d27f10572aabfdfcd8956fb978ad08c14f0 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Tue, 12 Nov 2024 14:32:59 +0100
Subject: [PATCH 09/92] refactor: renamed sc2_replaypack_processor ->
 sc2egset_replaypack

---
 README.md                                                   | 2 +-
 .../Dockerfile                                              | 6 +++---
 .../README.md                                               | 0
 .../__init__.py                                             | 0
 .../sc2egset_replaypack_processor.py}                       | 4 ++--
 .../utils/__init__.py                                       | 0
 .../utils/multiprocess.py                                   | 2 +-
 .../utils/replaypack_processor_args.py                      | 0
 ...cessor_test.py => sc2egset_replaypack_processor_test.py} | 2 +-
 9 files changed, 8 insertions(+), 8 deletions(-)
 rename src/datasetpreparator/sc2/{sc2_replaypack_processor => sc2egset_replaypack_processor}/Dockerfile (75%)
 rename src/datasetpreparator/sc2/{sc2_replaypack_processor => sc2egset_replaypack_processor}/README.md (100%)
 rename src/datasetpreparator/sc2/{sc2_replaypack_processor => sc2egset_replaypack_processor}/__init__.py (100%)
 rename src/datasetpreparator/sc2/{sc2_replaypack_processor/sc2_replaypack_processor.py => sc2egset_replaypack_processor/sc2egset_replaypack_processor.py} (95%)
 rename src/datasetpreparator/sc2/{sc2_replaypack_processor => sc2egset_replaypack_processor}/utils/__init__.py (100%)
 rename src/datasetpreparator/sc2/{sc2_replaypack_processor => sc2egset_replaypack_processor}/utils/multiprocess.py (96%)
 rename src/datasetpreparator/sc2/{sc2_replaypack_processor => sc2egset_replaypack_processor}/utils/replaypack_processor_args.py (100%)
 rename tests/test_cases/{sc2_replaypack_processor_test.py => sc2egset_replaypack_processor_test.py} (96%)

diff --git a/README.md b/README.md
index fefbedd..fd4fd0d 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ Each of the scripts has its usage described in their respective `README.md` file
 
 #### CLI Usage StarCraft 2 Specific Scripts
 1. [SC2 Map Downloader (sc2_map_downloader): README](src/sc2/sc2_map_downloader/README.md)
-2. [SC2 Replaypack Processor (sc2_replaypack_processor): README](src/sc2/sc2_replaypack_processor/README.md)
+2. [SC2EGSet Replaypack Processor (sc2egset_replaypack_processor): README](src/sc2/sc2egset_replaypack_processor/README.md)
 3. [SC2ReSet Replaypack Downloader (sc2reset_replaypack_downloader): README](src/sc2/sc2reset_replaypack_downloader/README.md)
 
 ## SC2EGSet Dataset Preparation Steps
diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/Dockerfile b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/Dockerfile
similarity index 75%
rename from src/datasetpreparator/sc2/sc2_replaypack_processor/Dockerfile
rename to src/datasetpreparator/sc2/sc2egset_replaypack_processor/Dockerfile
index 747e8c7..27797af 100644
--- a/src/datasetpreparator/sc2/sc2_replaypack_processor/Dockerfile
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/Dockerfile
@@ -4,7 +4,7 @@ FROM sc2-info-extractor
 
 FROM python:3.10-alpine
 
-WORKDIR /sc2-dataset-preparator
+WORKDIR /sc2_dataset_preparator
 
 # Copying the replay parsing tool:
 COPY --from=0 /SC2InfoExtractorGo /SC2InfoExtractorGo
@@ -15,6 +15,6 @@ RUN pip install click tqdm
 # Copying all Python scripts
 COPY . .
 
-WORKDIR /sc2-dataset-preparator/src
+WORKDIR /sc2_dataset_preparator/src
 
-CMD ["python3", "sc2_replaypack_processor.py"]
+CMD ["python3", "sc2egset_replaypack_processor.py"]
diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/README.md b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
similarity index 100%
rename from src/datasetpreparator/sc2/sc2_replaypack_processor/README.md
rename to src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/__init__.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/__init__.py
similarity index 100%
rename from src/datasetpreparator/sc2/sc2_replaypack_processor/__init__.py
rename to src/datasetpreparator/sc2/sc2egset_replaypack_processor/__init__.py
diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/sc2_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
similarity index 95%
rename from src/datasetpreparator/sc2/sc2_replaypack_processor/sc2_replaypack_processor.py
rename to src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 91fae9f..3424398 100644
--- a/src/datasetpreparator/sc2/sc2_replaypack_processor/sc2_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -5,11 +5,11 @@
 from tqdm import tqdm
 
 from datasetpreparator.settings import LOGGING_FORMAT
-from datasetpreparator.sc2.sc2_replaypack_processor.utils.replaypack_processor_args import (
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
     ReplaypackProcessorArguments,
     SC2InfoExtractorGoArguments,
 )
-from datasetpreparator.sc2.sc2_replaypack_processor.utils.multiprocess import (
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.multiprocess import (
     process_single_replaypack,
 )
 
diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/utils/__init__.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/__init__.py
similarity index 100%
rename from src/datasetpreparator/sc2/sc2_replaypack_processor/utils/__init__.py
rename to src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/__init__.py
diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/utils/multiprocess.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
similarity index 96%
rename from src/datasetpreparator/sc2/sc2_replaypack_processor/utils/multiprocess.py
rename to src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
index 330451e..4da3a9e 100644
--- a/src/datasetpreparator/sc2/sc2_replaypack_processor/utils/multiprocess.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
@@ -6,7 +6,7 @@
 
 from typing import List
 
-from datasetpreparator.sc2.sc2_replaypack_processor.utils.replaypack_processor_args import (
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
     SC2InfoExtractorGoArguments,
 )
 
diff --git a/src/datasetpreparator/sc2/sc2_replaypack_processor/utils/replaypack_processor_args.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
similarity index 100%
rename from src/datasetpreparator/sc2/sc2_replaypack_processor/utils/replaypack_processor_args.py
rename to src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
diff --git a/tests/test_cases/sc2_replaypack_processor_test.py b/tests/test_cases/sc2egset_replaypack_processor_test.py
similarity index 96%
rename from tests/test_cases/sc2_replaypack_processor_test.py
rename to tests/test_cases/sc2egset_replaypack_processor_test.py
index 6f134f5..a2ea1d3 100644
--- a/tests/test_cases/sc2_replaypack_processor_test.py
+++ b/tests/test_cases/sc2egset_replaypack_processor_test.py
@@ -1,6 +1,6 @@
 import unittest
 
-from datasetpreparator.sc2.sc2_replaypack_processor.sc2_replaypack_processor import (
+from datasetpreparator.sc2.sc2egset_replaypack_processor.sc2egset_replaypack_processor import (
     sc2_replaypack_processor,
 )
 

From a7c84c2054b3fec3ffc3c19a9b65f67326f5b832 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Tue, 12 Nov 2024 14:58:37 +0100
Subject: [PATCH 10/92] docs: added link to citation at the top

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fd4fd0d..215cbbf 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 # DatasetPreparator
 
-Tools in this repository were used to create the **[SC2ReSet: StarCraft II Esport Replaypack Set](https://doi.org/10.5281/zenodo.5575796)**, and finally **[SC2EGSet: StarCraft II Esport Game State Dataset](https://doi.org/10.5281/zenodo.5503997)**.
+Tools in this repository were used to create the **[SC2ReSet: StarCraft II Esport Replaypack Set](https://doi.org/10.5281/zenodo.5575796)**, and finally **[SC2EGSet: StarCraft II Esport Game State Dataset](https://doi.org/10.5281/zenodo.5503997)**, citation information [Cite Us!](#cite-us).
 
 ## Installation
 
@@ -80,7 +80,7 @@ If you want to report a bug, request a feature, or open any other issue, please
 
 Please see **[CONTRIBUTING.md](https://github.com/Kaszanas/SC2DatasetPreparator/blob/main/CONTRIBUTING.md)** for detailed development instructions and contribution guidelines.
 
-## Citing
+## Cite Us!
 
 ### This Repository
 

From 3fd124ea75a5ff59e68a77d3969731d44cff3537 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 13 Nov 2024 16:11:54 +0100
Subject: [PATCH 11/92] perf: downloading maps as a pre-process step

---
 .../sc2egset_replaypack_processor.py          | 101 +++++++++++-------
 .../utils/multiprocess.py                     |  32 +++++-
 .../utils/replaypack_processor_args.py        |  13 +++
 .../sc2egset_replaypack_processor_test.py     |   4 +-
 4 files changed, 107 insertions(+), 43 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 3424398..2cb10e8 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -10,11 +10,53 @@
     SC2InfoExtractorGoArguments,
 )
 from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.multiprocess import (
-    process_single_replaypack,
+    multiprocessing_scheduler,
+    pre_process_download_maps,
 )
 
 
-def sc2_replaypack_processor(
+def define_sc2egset_args(
+    input_path: Path,
+    output_path: Path,
+    arguments: ReplaypackProcessorArguments,
+    maybe_dir: Path,
+) -> ReplaypackProcessorArguments | None:
+    logging.debug(f"Processing entry: {maybe_dir}")
+    processing_input_dir = Path(input_path, maybe_dir).resolve()
+    if not processing_input_dir.is_dir():
+        logging.debug("Entry is not a directory, skipping!")
+        return None
+
+    logging.debug(f"Output dir: {output_path}")
+    # Create the main output directory:
+    if not output_path.exists():
+        output_path.mkdir()
+
+    # TODO: use pathlib:
+    path, output_directory_name = os.path.split(maybe_dir)
+    logging.debug(f"Output dir name: {output_directory_name}")
+    if output_directory_name == "input":
+        return None
+
+    output_directory_with_name = Path(output_path, output_directory_name).resolve()
+    logging.debug(f"Output filepath: {output_directory_with_name}")
+
+    # Create the output subdirectories:
+    if not output_directory_with_name.exists():
+        output_directory_with_name.mkdir()
+
+    sc2_info_extractor_go_args = (
+        SC2InfoExtractorGoArguments.get_sc2egset_processing_args(
+            processing_input=processing_input_dir,
+            output=output_directory_with_name,
+            perform_chat_anonymization=arguments.perform_chat_anonymization,
+        )
+    )
+
+    return sc2_info_extractor_go_args
+
+
+def sc2egset_replaypack_processor(
     arguments: ReplaypackProcessorArguments,
 ):
     """
@@ -29,49 +71,28 @@ def sc2_replaypack_processor(
 
     input_path = arguments.input_path
     output_path = arguments.output_path
-    # n_processes = arguments.n_processes
+    n_processes = arguments.n_processes
 
     multiprocessing_list = []
     for maybe_dir in tqdm(list(input_path.iterdir())):
-        logging.debug(f"Processing entry: {maybe_dir}")
-        processing_input_dir = Path(input_path, maybe_dir).resolve()
-        if not processing_input_dir.is_dir():
-            logging.debug("Entry is not a directory, skipping!")
-            continue
-
-        logging.debug(f"Output dir: {output_path}")
-        # Create the main output directory:
-        if not output_path.exists():
-            output_path.mkdir()
-
-        # TODO: use pathlib:
-        path, output_directory_name = os.path.split(maybe_dir)
-        logging.debug(f"Output dir name: {output_directory_name}")
-        if output_directory_name == "input":
-            continue
-
-        output_directory_with_name = Path(output_path, output_directory_name).resolve()
-        logging.debug(f"Output filepath: {output_directory_with_name}")
-
-        # Create the output subdirectories:
-        if not output_directory_with_name.exists():
-            output_directory_with_name.mkdir()
-
-        sc2_info_extractor_go_args = (
-            SC2InfoExtractorGoArguments.get_sc2egset_processing_args(
-                processing_input=processing_input_dir,
-                output=output_directory_with_name,
-                perform_chat_anonymization=arguments.perform_chat_anonymization,
-            )
+        sc2_info_extractor_go_args = define_sc2egset_args(
+            input_path=input_path,
+            output_path=output_path,
+            arguments=arguments,
+            maybe_dir=maybe_dir,
         )
+        if sc2_info_extractor_go_args is not None:
+            multiprocessing_list.append(sc2_info_extractor_go_args)
 
-        multiprocessing_list.append(sc2_info_extractor_go_args)
-
-    for args in multiprocessing_list:
-        logging.debug(f"Processing: {args}")
-        process_single_replaypack(arguments=args)
+    # Pre-process, download all maps:
+    logging.info("Downloading all maps...")
+    map_download_arguments = SC2InfoExtractorGoArguments.get_download_maps_args(
+        processing_input=arguments.input_path, output=arguments.output_path
+    )
+    pre_process_download_maps(arguments=map_download_arguments)
 
-    # multiprocessing_scheduler(multiprocessing_list, int(n_processes))
+    # Run processing with multiple SC2InfoExtractorGo instances:
+    multiprocessing_scheduler(multiprocessing_list, int(n_processes))
 
 
 @click.command(
@@ -127,7 +148,7 @@ def main(
         n_processes=n_processes,
     )
 
-    sc2_replaypack_processor(arguments=arguments)
+    sc2egset_replaypack_processor(arguments=arguments)
 
 
 if __name__ == "__main__":
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
index 4da3a9e..3c1f319 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
@@ -40,7 +40,7 @@ def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
     Parameters
     ----------
     arguments : SC2InfoExtractorGoArguments
-        Arguments tuple containing the input and output directory.
+        Specifies all of the arguments required to run SC2InfoExtractorGo.
     """
 
     # TODO: This will be refactored to use only the arguments object:
@@ -79,3 +79,33 @@ def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
             f"-log_dir={output_directory_filepath}/",
         ]
     )
+
+
+def pre_process_download_maps(arguments: SC2InfoExtractorGoArguments) -> None:
+    """
+    Acts as a pre-process step, executes SC2InfoExtractorGo with the
+    -only_map_download flag. Maps are required in the future steps of the
+    processing due to the fact that multiple SC2InfoExtractorGo instances will
+    be running in parallel. This means that the maps cannot be downloaded and processed
+    at the same time.
+
+    Parameters
+    ----------
+    arguments : SC2InfoExtractorGoArguments
+        Specifies all of the arguments required to run SC2InfoExtractorGo.
+    """
+
+    output_directory_filepath = arguments.output
+
+    subprocess.run(
+        [
+            # FIXME hardcoded binary name
+            "/SC2InfoExtractorGo",
+            f"-input={arguments.processing_input}/",
+            f"-output={arguments.output}/",
+            "-only_map_download=true",
+            f"-max_procs={arguments.max_procs}",
+            f"-log_level={arguments.log_level}",
+            f"-log_dir={output_directory_filepath}/",
+        ]
+    )
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
index f2037e4..cefd9ee 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
@@ -92,6 +92,19 @@ def __init__(
         )
         self.perform_validity_checks = "true" if perform_validity_checks else "false"
 
+    @staticmethod
+    def get_download_maps_args(
+        processing_input: Path, output: Path
+    ) -> "SC2InfoExtractorGoArguments":
+        arguments = SC2InfoExtractorGoArguments(
+            processing_input=processing_input,
+            output=output,
+            only_map_download=True,
+            max_procs=4,
+        )
+
+        return arguments
+
     @staticmethod
     def get_sc2egset_processing_args(
         processing_input: Path,
diff --git a/tests/test_cases/sc2egset_replaypack_processor_test.py b/tests/test_cases/sc2egset_replaypack_processor_test.py
index a2ea1d3..24fc97f 100644
--- a/tests/test_cases/sc2egset_replaypack_processor_test.py
+++ b/tests/test_cases/sc2egset_replaypack_processor_test.py
@@ -1,7 +1,7 @@
 import unittest
 
 from datasetpreparator.sc2.sc2egset_replaypack_processor.sc2egset_replaypack_processor import (
-    sc2_replaypack_processor,
+    sc2egset_replaypack_processor,
 )
 
 from tests.test_settings import (
@@ -44,7 +44,7 @@ def test_sc2_replaypack_processor(self):
         # Maybe a pytest marker to skip this test?
         # Should this even be tested given that the SC2InfoExtractorGo will have its own tests?
         # This script is only providing a multiprocessing wrapper for the SC2InfoExtractorGo.
-        sc2_replaypack_processor(
+        sc2egset_replaypack_processor(
             input_path=self.input_path,
             output_path=self.output_path,
             n_processes=1,

From b172e40c9b7cbe24433a3437a812d0da808ca4f3 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 17 Nov 2024 16:30:20 +0100
Subject: [PATCH 12/92] docs: added more README documentation, added TOC

---
 README.md | 76 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 2416aec..215cbbf 100644
--- a/README.md
+++ b/README.md
@@ -2,45 +2,77 @@
 
 # DatasetPreparator
 
-Tools in this repository were used to create the **[SC2ReSet: StarCraft II Esport Replaypack Set](https://doi.org/10.5281/zenodo.5575796)**, and finally **[SC2EGSet: StarCraft II Esport Game State Dataset](https://doi.org/10.5281/zenodo.5503997)**.
+Tools in this repository were used to create the **[SC2ReSet: StarCraft II Esport Replaypack Set](https://doi.org/10.5281/zenodo.5575796)**, and finally **[SC2EGSet: StarCraft II Esport Game State Dataset](https://doi.org/10.5281/zenodo.5503997)**, citation information [Cite Us!](#cite-us).
 
 ## Installation
 
-To install current version of the toolset as separate CLI tools run the following command:
+Our prefered way of distributing the toolset is through DockerHub. We Use the Docker Image to provide a fully reproducible environment for our scripts.
+
+To build the image please run the following command:
+
+```bash
+make docker_build
+```
+
+After building the image, please refer to the **[Command Line Arguments Usage](#command-line-arguments-usage)** section for the usage of the scripts.
+
+<!-- To install current version of the toolset as separate CLI tools run the following command:
 
 ```
 pip install datasetpreparator[all]
 ```
 
-After that each of the scripts should be available to call from the command line directly.
+After that each of the scripts should be available to call from the command line directly. -->
 
-## Dataset Preparation Steps
+## Command Line Arguments Usage
 
-To reproduce our experience with defining a dataset and to be able to compare your results with our work we describe how to perform the processing below.
+When using Docker, you will have to pass the arguments through the `docker run` command and mount the input/output directory. Below is an example of how to run the `directory_flattener` script using Docker. For ease of use we have prepared example directory structure in the `processing` directory. The command below uses that to issue a command to flatten the directory structure:
 
-### Using Docker
+```bash
+docker run -v "./processing:/app/processing" datasetpreparator python3 directory_flattener.py --input_path /app/processing/directory_flattener/input --output_path /app/processing/directory_flattener/output
+```
 
-1. Build the docker image from: https://github.com/Kaszanas/SC2InfoExtractorGo
-2. Run the commands as described in the ```makefile```. But first make sure that all of the script parameters are set according to your needs.
+### Table of Contents
 
-### Using Python
+Each of the scripts has its usage described in their respective `README.md` files, you can find the table of contents below.
 
-0. Obtain replays to process. This can be a replaypack or your own replay folder.
-1. Download latest version of [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo), or build it from source.
-2. **Optional** If the replays that you have are held in nested directories it is best to use  ```src/directory_flattener.py```. This will copy the directory and place all of the files to the top directory where it can be further processed. In order to preserve the old directory structure, a .json file is created. The file contains the old directory tree to a mapping: ```{"replayUniqueHash": "whereItWasInOldStructure"}```. This step is is required in order to properly use [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) as it only lists the files immediately available on the top level of the input directory. [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo).
-3. **Optional** Use the map downloader ```src/sc2_map_downloader.py``` to download maps that were used in the replays that you obtained. This is required for the next step.
-4. **Optional** Use the [SC2MapLocaleExtractor](https://github.com/Kaszanas/SC2MapLocaleExtractor) to obtain the mapping of ```{"foreign_map_name": "english_map_name"}``` which is required for the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) to translate the map names in the output .json files.
-5. Perform replaypack processing using ```src/sc2_replaypack_processor.py``` with the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) placed in PATH, or next to the script.
-6. **Optional** Using the ```src/file_renamer.py```, rename the files that were generated in the previous step. This is not required and is done to increase the readibility of the directory structure for the output.
-7. Using the ```src/file_packager.py```, create .zip archives containing the datasets and the supplementary files. By finishing this stage, your dataset should be ready to upload.
+#### CLI Usage Generic scripts
+1. [Directory Packager (dir_packager): README](src/dir_packager/README.md)
+2. [Directory Flattener (directory_flattener): README](src/directory_flattener/README.md)
+3. [File Renamer (file_renamer): README](src/file_renamer/README.md)
+4. [JSON Merger (json_merger): README](src/json_merger/README.md)
+5. [Processed Mapping Copier (processed_mapping_copier): README](src/processed_mapping_copier/README.md)
 
-#### Customization
+#### CLI Usage StarCraft 2 Specific Scripts
+1. [SC2 Map Downloader (sc2_map_downloader): README](src/sc2/sc2_map_downloader/README.md)
+2. [SC2EGSet Replaypack Processor (sc2egset_replaypack_processor): README](src/sc2/sc2egset_replaypack_processor/README.md)
+3. [SC2ReSet Replaypack Downloader (sc2reset_replaypack_downloader): README](src/sc2/sc2reset_replaypack_downloader/README.md)
 
-In order to specify different processing flags for https://github.com/Kaszanas/SC2InfoExtractorGo please modify the ```sc2_replaypack_processor.py``` file directly
+## SC2EGSet Dataset Preparation Steps
 
-## Command Line Arguments Usage
+To reproduce our experience with defining a dataset and to be able to compare your results with our work we describe how to perform the processing below.
+
+### Using Docker
+
+We provide a release image containing all of the scripts. To see the usage of these scripts please refer to their respective ``README.md`` files as described above.
+
+The following steps were used to prepare the SC2EGSet dataset:
+1. Build the docker image from: https://github.com/Kaszanas/SC2InfoExtractorGo
+2. Build the docker image for the DatasetPreparator using the provided ```makefile``` command: ```make docker_build```.
+3. Place the input replaypacks into `./processing/directory_flattener/` directory.
+4. Run the command ```make all``` to process the replaypacks and create the dataset. The output will be placed in `./processing/sc2_replaypack_processor/output` directory.
+
+<!-- ### Using Python
+
+1. Obtain replays to process. This can be a replaypack or your own replay folder.
+2. Download latest version of [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo), or build it from source.
+3. **Optional** If the replays that you have are held in nested directories it is best to use  ```src/directory_flattener.py```. This will copy the directory and place all of the files to the top directory where it can be further processed. In order to preserve the old directory structure, a .json file is created. The file contains the old directory tree to a mapping: ```{"replayUniqueHash": "whereItWasInOldStructure"}```. This step is is required in order to properly use [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) as it only lists the files immediately available on the top level of the input directory. [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo).
+4. **Optional** Use the map downloader ```src/sc2_map_downloader.py``` to download maps that were used in the replays that you obtained. This is required for the next step.
+5. **Optional** Use the [SC2MapLocaleExtractor](https://github.com/Kaszanas/SC2MapLocaleExtractor) to obtain the mapping of ```{"foreign_map_name": "english_map_name"}``` which is required for the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) to translate the map names in the output .json files.
+6. Perform replaypack processing using ```src/sc2_replaypack_processor.py``` with the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo) placed in PATH, or next to the script.
+7. **Optional** Using the ```src/file_renamer.py```, rename the files that were generated in the previous step. This is not required and is done to increase the readibility of the directory structure for the output.
+8. Using the ```src/file_packager.py```, create .zip archives containing the datasets and the supplementary files. By finishing this stage, your dataset should be ready to upload. -->
 
-Each of the scripts has its usage described in their respective `README.md` files.
 
 ## Contributing and Reporting Issues
 
@@ -48,7 +80,7 @@ If you want to report a bug, request a feature, or open any other issue, please
 
 Please see **[CONTRIBUTING.md](https://github.com/Kaszanas/SC2DatasetPreparator/blob/main/CONTRIBUTING.md)** for detailed development instructions and contribution guidelines.
 
-## Citing
+## Cite Us!
 
 ### This Repository
 

From 82282ca0b070c889e7be99182e91e2de81606c89 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 17 Nov 2024 16:30:49 +0100
Subject: [PATCH 13/92] docs: formatting CONTRIBUTING

---
 CONTRIBUTING.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6a02b3b..6ae5d75 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -61,15 +61,15 @@ Ready to contribute? Here's how to set up `datasetpreparator` for local developm
 1. Download a copy of `datasetpreparator` locally.
 2. Install `datasetpreparator` using `poetry`:
 
-    ```console
-      poetry install
-    ```
+```console
+  poetry install
+```
 
 3. Use `git` (or similar) to create a branch for local development and make your changes:
 
-    ```console
-      git checkout -b name-of-your-bugfix-or-feature
-    ```
+```console
+  git checkout -b name-of-your-bugfix-or-feature
+```
 
 4. When you're done making changes, check that your changes conform to any code formatting requirements and pass any tests.
 

From 4aa2092f151c4317cf664506bb544ea968309e7c Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 17 Nov 2024 16:31:33 +0100
Subject: [PATCH 14/92] refactor: capitalized "AS" in docker

---
 docker/Dockerfile     | 2 +-
 docker/Dockerfile.dev | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index bfd459d..7fa99aa 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -3,7 +3,7 @@
 
 ARG PYTHON_VERSION=3.11
 
-FROM kaszanas/sc2infoextractorgo:latest as extractor
+FROM kaszanas/sc2infoextractorgo:latest AS extractor
 
 FROM python:${PYTHON_VERSION}-alpine
 
diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
index 74add80..67f98ab 100644
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@@ -3,7 +3,7 @@
 
 ARG PYTHON_VERSION=3.10.11
 
-FROM python:${PYTHON_VERSION}
+FROM python:${PYTHON_VERSION} AS base
 
 # Install other dependencies such as git, etc. This is needed for
 # pre-commit hooks to work and other QOL stuff.

From ec5cbe8f51afac1555f0909dcccede163b7b1bcf Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 17 Nov 2024 17:49:30 +0100
Subject: [PATCH 15/92] docs: drafted script README files with Docker

---
 src/datasetpreparator/dir_packager/README.md  | 25 ++++++++++++++++-
 .../directory_flattener/README.md             | 28 +++++++++++++++++++
 src/datasetpreparator/file_renamer/README.md  | 23 +++++++++++++++
 src/datasetpreparator/json_merger/README.md   | 26 +++++++++++++++++
 .../processed_mapping_copier/README.md        | 27 ++++++++++++++++++
 .../sc2/sc2_map_downloader/README.md          | 27 ++++++++++++++++++
 .../sc2reset_replaypack_downloader/README.md  | 24 ++++++++++++++++
 7 files changed, 179 insertions(+), 1 deletion(-)

diff --git a/src/datasetpreparator/dir_packager/README.md b/src/datasetpreparator/dir_packager/README.md
index 56d8b74..8501c73 100644
--- a/src/datasetpreparator/dir_packager/README.md
+++ b/src/datasetpreparator/dir_packager/README.md
@@ -1,6 +1,6 @@
 # File Packager
 
-Utility script for compressing files into a `.zip` archive.
+Utility script for compressing a directory into a `.zip` archive.
 
 # CLI Usage
 
@@ -15,3 +15,26 @@ options:
   --input_dir INPUT_DIR (default = ../../processing/sc2_replaypack_processor/output)
                         Please provide input path to the directory containing the dataset that is going to be processed by packaging into .zip archives.
 ```
+
+# Execute With Docker
+
+> [!NOTE]
+> There are two ways of executing this script with Docker. One is to use the main repository Dockerfile (available in `docker` directory) and the other is to use the Dockerfile contained in this directory.
+
+## Repository Docker Image
+
+Please refer to the main [README](../../README.md) for the instructions.
+
+## Script Docker Image
+
+Buil the docker image:
+```bash
+docker build --tag=datasetpreparator:file_packager .
+```
+
+Run the docker image (please replace `<paths>`):
+```bash
+docker run -v "<./input>:/app/input" \
+    datasetpreparator:file_packager \
+    python3 dir_packager.py --input_dir /app/input
+```
diff --git a/src/datasetpreparator/directory_flattener/README.md b/src/datasetpreparator/directory_flattener/README.md
index 5e402a8..2088a60 100644
--- a/src/datasetpreparator/directory_flattener/README.md
+++ b/src/datasetpreparator/directory_flattener/README.md
@@ -22,3 +22,31 @@ options:
   --file_extension FILE_EXTENSION (default = .SC2Replay)
                         Please provide a file extension for files that will be moved and renamed.
 ```
+
+# Execute With Docker
+
+> [!NOTE]
+> There are two ways of executing this script with Docker. One is to use the main repository Dockerfile (available in `docker` directory) and the other is to use the Dockerfile contained in this directory.
+
+## Repository Docker Image
+
+Please refer to the main [README](../../README.md) for the instructions.
+
+## Script Docker Image
+
+Buil the docker image:
+```bash
+docker build --tag=datasetpreparator:directory_flattener .
+```
+
+Run the docker image (please replace `<paths>`):
+```bash
+docker run
+    -v "<./input>:/app/input" \
+    -v "<./output>:/app/output" \
+    datasetpreparator:file_packager \
+    python3 directory_flattener.py \
+    --input_dir /app/input \
+    --output_dir /app/output \
+    --file_extension .SC2Replay
+```
diff --git a/src/datasetpreparator/file_renamer/README.md b/src/datasetpreparator/file_renamer/README.md
index 4b83047..4fbac15 100644
--- a/src/datasetpreparator/file_renamer/README.md
+++ b/src/datasetpreparator/file_renamer/README.md
@@ -13,3 +13,26 @@ options:
   --input_dir INPUT_DIR (default = ../../processing/sc2_replaypack_processor/output)
                         Please provide input path to the directory containing the dataset that is going to be processed.
 ```
+
+# Execute With Docker
+
+> [!NOTE]
+> There are two ways of executing this script with Docker. One is to use the main repository Dockerfile (available in `docker` directory) and the other is to use the Dockerfile contained in this directory.
+
+## Repository Docker Image
+
+Please refer to the main [README](../../README.md) for the instructions.
+
+## Script Docker Image
+
+Buil the docker image:
+```bash
+docker build --tag=datasetpreparator:file_renamer .
+```
+
+Run the docker image (please replace `<paths>`):
+```bash
+docker run -v "<./input>:/app/input" \
+    datasetpreparator:file_renamer \
+    python3 file_renamer.py --input_dir /app/input
+```
diff --git a/src/datasetpreparator/json_merger/README.md b/src/datasetpreparator/json_merger/README.md
index d136f49..b61ed4a 100644
--- a/src/datasetpreparator/json_merger/README.md
+++ b/src/datasetpreparator/json_merger/README.md
@@ -20,3 +20,29 @@ options:
   --output_filepath OUTPUT_FILEPATH (default = ../../processing/json_merger/merged.json)
                         Please provide output path where sc2 map files will be downloaded.
 ```
+
+# Execute With Docker
+
+> [!NOTE]
+> There are two ways of executing this script with Docker. One is to use the main repository Dockerfile (available in `docker` directory) and the other is to use the Dockerfile contained in this directory.
+
+## Repository Docker Image
+
+Please refer to the main [README](../../README.md) for the instructions.
+
+## Script Docker Image
+
+Buil the docker image:
+```bash
+docker build --tag=datasetpreparator:json_merger .
+```
+
+Run the docker image (please replace `<paths>`):
+```bash
+docker run -v "<./input>:/app/input" \
+    datasetpreparator:file_packager \
+    python3 json_merger.py \
+    --json_one /app/input/json1.json \
+    --json_two /app/input/json2.json \
+    --output_filepath /app/input/merged.json
+```
diff --git a/src/datasetpreparator/processed_mapping_copier/README.md b/src/datasetpreparator/processed_mapping_copier/README.md
index 95bf222..bb17838 100644
--- a/src/datasetpreparator/processed_mapping_copier/README.md
+++ b/src/datasetpreparator/processed_mapping_copier/README.md
@@ -18,3 +18,30 @@ options:
   --output_path OUTPUT_PATH (default = ../../processing/sc2_replaypack_processor/output)
                         Please provide output path where processed_mapping.json will be copied.
 ```
+
+# Execute With Docker
+
+> [!NOTE]
+> There are two ways of executing this script with Docker. One is to use the main repository Dockerfile (available in `docker` directory) and the other is to use the Dockerfile contained in this directory.
+
+## Repository Docker Image
+
+Please refer to the main [README](../../README.md) for the instructions.
+
+## Script Docker Image
+
+Buil the docker image:
+```bash
+docker build --tag=datasetpreparator:processed_mapping_copier .
+```
+
+Run the docker image (please replace `<paths>`):
+```bash
+docker run \
+    -v "<./input>:/app/input" \
+    -v "<./output>:/app/output" \
+    datasetpreparator:file_packager \
+    python3 processed_mapping_copier.py \
+    --input_dir /app/input \
+    --output_dir /app/output
+```
diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/README.md b/src/datasetpreparator/sc2/sc2_map_downloader/README.md
index 6f6d62e..c826a00 100644
--- a/src/datasetpreparator/sc2/sc2_map_downloader/README.md
+++ b/src/datasetpreparator/sc2/sc2_map_downloader/README.md
@@ -17,3 +17,30 @@ options:
   --output_path OUTPUT_PATH (default = ../../processing/sc2_map_downloader/output)
                         Please provide output path where sc2 map files will be downloaded.
 ```
+
+# Execute With Docker
+
+> [!NOTE]
+> There are two ways of executing this script with Docker. One is to use the main repository Dockerfile (available in `docker` directory) and the other is to use the Dockerfile contained in this directory.
+
+## Repository Docker Image
+
+Please refer to the main [README](../../README.md) for the instructions.
+
+## Script Docker Image
+
+Buil the docker image:
+```bash
+docker build --tag=datasetpreparator:sc2_map_downloader .
+```
+
+Run the docker image (please replace `<paths>`):
+```bash
+docker run \
+    -v "<./input>:/app/input" \
+    -v "<./output>:/app/output" \
+    datasetpreparator:sc2_map_downloader \
+    python3 dir_packager.py \
+    --input_dir /app/input \
+    --output_dir /app/output
+```
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
index 3fb995d..fb705f6 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
@@ -9,6 +9,30 @@ Please keep in mind that the ```src/sc2reset_replaypack_downloader.py```  contai
 
 ```
 
+# Execute With Docker
+
+> [!NOTE]
+> There are two ways of executing this script with Docker. One is to use the main repository Dockerfile (available in `docker` directory) and the other is to use the Dockerfile contained in this directory.
+
+## Repository Docker Image
+
+Please refer to the main [README](../../README.md) for the instructions.
+
+## Script Docker Image
+
+Buil the docker image:
+```bash
+docker build --tag=datasetpreparator:sc2reset_replaypack_downloader .
+```
+
+Run the docker image (please replace `<paths>`):
+```bash
+docker run -v "<./input>:/app/input" \
+    datasetpreparator:sc2reset_replaypack_downloader \
+    python3 sc2reset_replaypack_downloader.py --input_dir /app/input
+```
+
+
 ## Citation
 
 SC2ReSet replaypack collection was formally introduced in:

From 7a968cec6933d2f4133862db4043f656187b72e3 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 17 Nov 2024 18:07:35 +0100
Subject: [PATCH 16/92] docs: updated all CLI Usage for scripts

---
 src/datasetpreparator/dir_packager/README.md  | 24 ++++----
 .../directory_flattener/README.md             | 31 +++++-----
 src/datasetpreparator/file_renamer/README.md  | 20 ++++---
 src/datasetpreparator/json_merger/README.md   | 28 +++++-----
 .../processed_mapping_copier/README.md        | 25 +++++----
 .../sc2/sc2_map_downloader/README.md          | 25 +++++----
 .../sc2egset_replaypack_processor/README.md   | 56 ++++++++++++++-----
 .../sc2reset_replaypack_downloader/README.md  | 14 +++++
 8 files changed, 142 insertions(+), 81 deletions(-)

diff --git a/src/datasetpreparator/dir_packager/README.md b/src/datasetpreparator/dir_packager/README.md
index 8501c73..b26d935 100644
--- a/src/datasetpreparator/dir_packager/README.md
+++ b/src/datasetpreparator/dir_packager/README.md
@@ -6,14 +6,18 @@ Utility script for compressing a directory into a `.zip` archive.
 
 Please keep in mind that the  ```src/file_packager.py``` contains default flag values and can be customized with the following command line flags:
 ```
-usage: file_packager.py [-h] [--input_dir INPUT_DIR]
-
-Tool used for processing StarCraft 2 (SC2) datasets. with https://github.com/Kaszanas/SC2InfoExtractorGo
-
-options:
-  -h, --help            show this help message and exit
-  --input_dir INPUT_DIR (default = ../../processing/sc2_replaypack_processor/output)
-                        Please provide input path to the directory containing the dataset that is going to be processed by packaging into .zip archives.
+Usage: dir_packager.py [OPTIONS]
+
+Tool used for processing StarCraft 2 (SC2) datasets.
+with https://github.com/Kaszanas/SC2InfoExtractorGo
+
+Options:
+  --input_path DIRECTORY    Please provide input path to the directory
+                            containing the dataset that is going to be
+                            processed by packaging into .zip archives.
+                            [required]
+  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
+  --help                    Show this message and exit.
 ```
 
 # Execute With Docker
@@ -29,12 +33,12 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 Buil the docker image:
 ```bash
-docker build --tag=datasetpreparator:file_packager .
+docker build --tag=datasetpreparator:dir_packager .
 ```
 
 Run the docker image (please replace `<paths>`):
 ```bash
 docker run -v "<./input>:/app/input" \
-    datasetpreparator:file_packager \
+    datasetpreparator:dir_packager \
     python3 dir_packager.py --input_dir /app/input
 ```
diff --git a/src/datasetpreparator/directory_flattener/README.md b/src/datasetpreparator/directory_flattener/README.md
index 8d24222..9b54481 100644
--- a/src/datasetpreparator/directory_flattener/README.md
+++ b/src/datasetpreparator/directory_flattener/README.md
@@ -7,20 +7,21 @@ Utility script for entering each of the supplied replaypacks and flattening its
 Please keep in mind that ```src/directory_flattener.py``` does not contain default flag values and can be customized with the following command line flags:
 
 ```
-usage: directory_flattener.py [-h] [--input_path INPUT_PATH] [--output_path OUTPUT_PATH]
-                              [--file_extension FILE_EXTENSION]
-
-Directory restructuring tool used in order to flatten the structure, map the old structure to a separate
-file, and for later processing with other tools. Created primarily to define StarCraft 2 (SC2) datasets.
-
-options:
-  -h, --help            show this help message and exit
-  --input_path INPUT_PATH (default = ../../processing/directory_flattener/input)
-                        Please provide input path to the dataset that is going to be processed.
-  --output_path OUTPUT_PATH (default = ../../processing/directory_flattener/output)
-                        Please provide output path where sc2 map files will be downloaded.
-  --file_extension FILE_EXTENSION (default = .SC2Replay)
-                        Please provide a file extension for files that will be moved and renamed.
+Usage: directory_flattener.py [OPTIONS]
+
+Directory restructuring tool used in order to flatten the structure, map the
+old structure to a separate file, and for later processing with other tools.
+Created primarily to define StarCraft 2 (SC2) datasets.
+
+Options:
+  --input_path DIRECTORY    Please provide input path to the dataset that is
+                            going to be processed.  [required]
+  --output_path DIRECTORY   Please provide output path where the tool will put
+                            files after processing.  [required]
+  --file_extension TEXT     Specify file extension for the files that will be
+                            put to the top level directory.  [required]
+  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
+  --help                    Show this message and exit.
 ```
 
 # Execute With Docker
@@ -44,7 +45,7 @@ Run the docker image (please replace `<paths>`):
 docker run
     -v "<./input>:/app/input" \
     -v "<./output>:/app/output" \
-    datasetpreparator:file_packager \
+    datasetpreparator:directory_flattener \
     python3 directory_flattener.py \
     --input_dir /app/input \
     --output_dir /app/output \
diff --git a/src/datasetpreparator/file_renamer/README.md b/src/datasetpreparator/file_renamer/README.md
index 4fbac15..3a3024f 100644
--- a/src/datasetpreparator/file_renamer/README.md
+++ b/src/datasetpreparator/file_renamer/README.md
@@ -4,14 +4,18 @@
 
 Please keep in mind that the  ```src/file_renamer.py``` contains default flag values and can be customized with the following command line flags:
 ```
-usage: file_renamer.py [-h] [--input_dir INPUT_DIR]
-
-Tool used for processing StarCraft 2 (SC2) datasets with https://github.com/Kaszanas/SC2InfoExtractorGo
-
-options:
-  -h, --help            show this help message and exit
-  --input_dir INPUT_DIR (default = ../../processing/sc2_replaypack_processor/output)
-                        Please provide input path to the directory containing the dataset that is going to be processed.
+Usage: file_renamer.py [OPTIONS]
+
+Tool used for processing StarCraft 2 (SC2) datasets. with
+https://github.com/Kaszanas/SC2InfoExtractorGo
+
+Options:
+  --input_path DIRECTORY    Please provide input path to the directory
+                            containing the dataset that is going to be
+                            processed by packaging into .zip archives.
+                            [required]
+  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
+  --help                    Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/json_merger/README.md b/src/datasetpreparator/json_merger/README.md
index b61ed4a..d11f1fe 100644
--- a/src/datasetpreparator/json_merger/README.md
+++ b/src/datasetpreparator/json_merger/README.md
@@ -6,19 +6,21 @@ Utility script that is merging two JSON files.
 
 Please keep in mind that the  ```src/json_merger.py``` contains default flag values and can be customized with the following command line flags:
 ```
-usage: json_merger.py [-h] [--json_one JSON_ONE] [--json_two JSON_TWO] [--output_filepath OUTPUT_FILEPATH]
-
-Tool used for merging two .json files. Created in order to merge two mappings created by
-https://github.com/Kaszanas/SC2MapLocaleExtractor
-
-options:
-  -h, --help            show this help message and exit
-  --json_one JSON_ONE (default = ../../processing/json_merger/json1.json)
-                    Please provide the path to the first .json file that is going to be merged.
-  --json_two JSON_TWO (default = ../../processing/json_merger/json2.json)
-                    Please provide the path to the second .json file that is going to be merged.
-  --output_filepath OUTPUT_FILEPATH (default = ../../processing/json_merger/merged.json)
-                        Please provide output path where sc2 map files will be downloaded.
+Usage: json_merger.py [OPTIONS]
+
+Tool used for merging two .json files. Created in order to merge two
+mappings created by https://github.com/Kaszanas/SC2MapLocaleExtractor
+
+Options:
+  --json_one FILE           Please provide the path to the first .json file
+                            that is going to be merged.  [required]
+  --json_two FILE           Please provide the path to the second .json file
+                            that is going to be merged.  [required]
+  --output_filepath FILE    Please provide a filepath to which the result JSON
+                            file will be saved, note that any existing file of
+                            the same name will be overwriten.  [required]
+  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
+  --help                    Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/processed_mapping_copier/README.md b/src/datasetpreparator/processed_mapping_copier/README.md
index bb17838..8440f6e 100644
--- a/src/datasetpreparator/processed_mapping_copier/README.md
+++ b/src/datasetpreparator/processed_mapping_copier/README.md
@@ -6,17 +6,20 @@ Utility script that enters each of the processed replaypack directories and copi
 
 Please keep in mind that the  ```src/processed_mapping_copier.py``` contains default flag values and can be customized with the following command line flags:
 ```
-usage: processed_mapping_copier.py [-h] [--input_path INPUT_PATH] [--output_path OUTPUT_PATH]
-
-Tool for copying the processed_mapping.json files that are required to define the StarCraft 2 (SC2) dataset.
-
-options:
-  -h, --help            show this help message and exit
-  --input_path INPUT_PATH (default = ../../processing/directory_flattener/output)
-                        Please provide input path to the flattened replaypacks that contain
-                        procesed_mapping.json files.
-  --output_path OUTPUT_PATH (default = ../../processing/sc2_replaypack_processor/output)
-                        Please provide output path where processed_mapping.json will be copied.
+Usage: processed_mapping_copier.py [OPTIONS]
+
+Tool for copying the processed_mapping.json files to the matching directory
+after processing the replaypack into a JSON dataset. This step is required
+to define the StarCraft 2 (SC2) dataset.
+
+Options:
+  --input_path DIRECTORY    Please provide input path to the flattened
+                            replaypacks that contain procesed_mapping.json
+                            files.  [required]
+  --output_path DIRECTORY   Please provide output path where
+                            processed_mapping.json will be copied.  [required]
+  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
+  --help                    Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/README.md b/src/datasetpreparator/sc2/sc2_map_downloader/README.md
index c826a00..e631767 100644
--- a/src/datasetpreparator/sc2/sc2_map_downloader/README.md
+++ b/src/datasetpreparator/sc2/sc2_map_downloader/README.md
@@ -6,16 +6,19 @@ Utility script that opens each of the provided replays and downloads the map fro
 
 Please keep in mind that the  ```src/sc2_map_downloader.py``` does not contain default flag values and can be customized with the following command line flags:
 ```
-usage: sc2_map_downloader.py [-h] [--input_path INPUT_PATH] [--output_path OUTPUT_PATH]
-
-Tool for downloading StarCraft 2 (SC2) maps based on the data that is available within .SC2Replay file.
-
-options:
-  -h, --help            show this help message and exit
-  --input_path INPUT_PATH (default = ../../processing/directory_flattener/output)
-                        Please provide input path to the dataset that is going to be processed.
-  --output_path OUTPUT_PATH (default = ../../processing/sc2_map_downloader/output)
-                        Please provide output path where sc2 map files will be downloaded.
+Usage: sc2_map_downloader.py [OPTIONS]
+
+Tool for downloading StarCraft 2 (SC2) maps based on
+the data that is available within .SC2Replay file.
+
+Options:
+  --input_path DIRECTORY         Please provide input path to the dataset that
+                                 is going to be processed.  [required]
+  --output_path DIRECTORY        Please provide output path where StarCraft 2
+                                 (SC2) map files will be downloaded.
+                                 [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level (INFO, DEBUG, ERROR)
+  --help                         Show this message and exit.
 ```
 
 # Execute With Docker
@@ -40,7 +43,7 @@ docker run \
     -v "<./input>:/app/input" \
     -v "<./output>:/app/output" \
     datasetpreparator:sc2_map_downloader \
-    python3 dir_packager.py \
+    python3 sc2_map_downloader.py \
     --input_dir /app/input \
     --output_dir /app/output
 ```
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
index 9f3977f..bdfad89 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
@@ -6,17 +6,47 @@ Utility script that leverages the [SC2InfoExtractorGo](https://github.com/Kaszan
 
 Please keep in mind that the ```src/sc2_replaypack_processor.py```  contains default flag values and can be customized with the following command line flags:
 ```
-usage: sc2_replaypack_processor.py [-h] [--input_dir INPUT_DIR] [--output_dir OUTPUT_DIR]
-                                   [--n_processes N_PROCESSES]
-
-Tool used for processing StarCraft 2 (SC2) datasets. with https://github.com/Kaszanas/SC2InfoExtractorGo
-
-options:
-  -h, --help            show this help message and exit
-  --input_dir INPUT_DIR (default = ../../processing/directory_flattener/output)
-                        Please provide input path to the directory containing the dataset that is going to be processed.
-  --output_dir OUTPUT_DIR (default = ../../processing/sc2_replaypack_processor/output)
-                        Please provide an output directory for the resulting files.
-  --n_processes N_PROCESSES (default = 4)
-                        Please provide the number of processes to be spawned for the dataset processing.
+Usage: sc2egset_replaypack_processor.py [OPTIONS]
+
+Tool used to execute SC2InfoExtractorGo
+(https://github.com/Kaszanas/SC2InfoExtractorGo) on multiple replaypack
+directories. Assists in processing StarCraft 2 (SC2) datasets.
+
+Options:
+  --input_path DIRECTORY          Please provide an output directory for the
+                                  resulting files.  [required]
+  --output_path DIRECTORY         Please provide output path where StarCraft 2
+                                  (SC2) map files will be downloaded.
+                                  [required]
+  --perform_chat_anonymization BOOLEAN
+                                  Provide 'True' if chat should be anonymized,
+                                  otherwise 'False'.  [required]
+  --n_processes INTEGER           Please provide the number of processes to be
+                                  spawned for the dataset processing.
+                                  [required]
+  --log [INFO|DEBUG|ERROR]        Log level (INFO, DEBUG, ERROR)
+  --help                          Show this message and exit.
+```
+
+# Execute With Docker
+
+> [!NOTE]
+> There are two ways of executing this script with Docker. One is to use the main repository Dockerfile (available in `docker` directory) and the other is to use the Dockerfile contained in this directory.
+
+## Repository Docker Image
+
+Please refer to the main [README](../../README.md) for the instructions.
+
+## Script Docker Image
+
+Buil the docker image:
+```bash
+docker build --tag=datasetpreparator:sc2_replaypack_processor .
+```
+
+Run the docker image (please replace `<paths>`):
+```bash
+docker run -v "<./input>:/app/input" \
+    datasetpreparator:sc2_replaypack_processor \
+    python3 sc2_replaypack_processor.py --input_dir /app/input
 ```
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
index fb705f6..9c41ae2 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
@@ -6,7 +6,21 @@ Utility script, downloads the contents of SC2ReSet replaypack from a Zenodo repo
 
 Please keep in mind that the ```src/sc2reset_replaypack_downloader.py```  contains default flag values and can be customized with the following command line flags:
 ```
+Usage: sc2reset_replaypack_downloader.py [OPTIONS]
 
+Tool used for downloading
+SC2ReSet: StarCraft II Esport Replaypack Set
+(https://zenodo.org/doi/10.5281/zenodo.5575796).
+
+Options:
+  --download_path DIRECTORY      Please provide a path to which the archives
+                                 will be downloaded.  [required]
+  --unpack_path DIRECTORY        Please provide a path to which the archives
+                                 will be unpacked.  [required]
+  --n_workers INTEGER            Number of workers used for extracting the
+                                 .zip archives.  [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level (INFO, DEBUG, ERROR)
+  --help                         Show this message and exit.
 ```
 
 # Execute With Docker

From fd8ad2a174b4c1116104ef76c4cf5c9293bcbf99 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 17 Nov 2024 22:06:26 +0100
Subject: [PATCH 17/92] fix: fixed log level, fixing path initialization

---
 .../directory_flattener/directory_flattener.py               | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index 08c68fd..6e17322 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -205,7 +205,7 @@ def multiple_directory_flattener(
 )
 @click.option(
     "--log",
-    type=click.Choice(["INFO", "DEBUG", "ERROR"], case_sensitive=False),
+    type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
     help="Log level (INFO, DEBUG, ERROR)",
 )
@@ -215,6 +215,9 @@ def main(input_path: str, output_path: str, file_extension: str, log: str) -> No
         raise ValueError(f"Invalid log level: {numeric_level}")
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
+    input_path = Path(input_path)
+    output_path = Path(output_path)
+
     multiple_directory_flattener(
         input_path=input_path, output_path=output_path, file_extension=file_extension
     )

From 8b10ac86a9a714adb2e63e94210a810491606916 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 18 Nov 2024 02:46:53 +0100
Subject: [PATCH 18/92] fix: fixing glob issues, testing directory flattener

---
 README.md                                     | 37 +++++++++++--------
 .../directory_flattener.py                    | 13 ++++++-
 src/datasetpreparator/settings.py             |  3 +-
 3 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 215cbbf..5c16ccd 100644
--- a/README.md
+++ b/README.md
@@ -32,24 +32,11 @@ When using Docker, you will have to pass the arguments through the `docker run`
 docker run -v "./processing:/app/processing" datasetpreparator python3 directory_flattener.py --input_path /app/processing/directory_flattener/input --output_path /app/processing/directory_flattener/output
 ```
 
-### Table of Contents
-
-Each of the scripts has its usage described in their respective `README.md` files, you can find the table of contents below.
-
-#### CLI Usage Generic scripts
-1. [Directory Packager (dir_packager): README](src/dir_packager/README.md)
-2. [Directory Flattener (directory_flattener): README](src/directory_flattener/README.md)
-3. [File Renamer (file_renamer): README](src/file_renamer/README.md)
-4. [JSON Merger (json_merger): README](src/json_merger/README.md)
-5. [Processed Mapping Copier (processed_mapping_copier): README](src/processed_mapping_copier/README.md)
-
-#### CLI Usage StarCraft 2 Specific Scripts
-1. [SC2 Map Downloader (sc2_map_downloader): README](src/sc2/sc2_map_downloader/README.md)
-2. [SC2EGSet Replaypack Processor (sc2egset_replaypack_processor): README](src/sc2/sc2egset_replaypack_processor/README.md)
-3. [SC2ReSet Replaypack Downloader (sc2reset_replaypack_downloader): README](src/sc2/sc2reset_replaypack_downloader/README.md)
-
 ## SC2EGSet Dataset Preparation Steps
 
+> [!NOTE]
+> Instructions below are for reproducing the result of the SC2EGSet dataset. If you wish to use the tools in this repository separately for your own dataset, please refer to the **[Table of Contents](#detailed-tools-description)**.
+
 To reproduce our experience with defining a dataset and to be able to compare your results with our work we describe how to perform the processing below.
 
 ### Using Docker
@@ -62,6 +49,24 @@ The following steps were used to prepare the SC2EGSet dataset:
 3. Place the input replaypacks into `./processing/directory_flattener/` directory.
 4. Run the command ```make all``` to process the replaypacks and create the dataset. The output will be placed in `./processing/sc2_replaypack_processor/output` directory.
 
+
+### Detailed Tools Description
+
+Each of the scripts has its usage described in their respective `README.md` files, you can find the table of contents below.
+
+#### CLI Usage Generic scripts
+1. [Directory Packager (dir_packager): README](src/datasetpreparator/dir_packager/README.md)
+2. [Directory Flattener (directory_flattener): README](src/datasetpreparator/directory_flattener/README.md)
+3. [File Renamer (file_renamer): README](src/datasetpreparator/file_renamer/README.md)
+4. [JSON Merger (json_merger): README](src/datasetpreparator/json_merger/README.md)
+5. [Processed Mapping Copier (processed_mapping_copier): README](src/datasetpreparator/processed_mapping_copier/README.md)
+
+#### CLI Usage StarCraft 2 Specific Scripts
+1. [SC2 Map Downloader (sc2_map_downloader): README](src/datasetpreparator/sc2/sc2_map_downloader/README.md)
+2. [SC2EGSet Replaypack Processor (sc2egset_replaypack_processor): README](src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md)
+3. [SC2ReSet Replaypack Downloader (sc2reset_replaypack_downloader): README](src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md)
+
+
 <!-- ### Using Python
 
 1. Obtain replays to process. This can be a replaypack or your own replay folder.
diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index 6e17322..ba3a4bb 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -27,6 +27,7 @@ def save_dir_mapping(output_path: str, dir_mapping: dict) -> None:
         json.dump(dir_mapping, json_file)
 
 
+# REVIEW: This function takes too long. I should be hashing the filepath from the replapack directory:
 def calculate_file_hash(file_path: Path) -> str:
     """
     Calculates the file hash using the selected algorithm.
@@ -158,14 +159,19 @@ def multiple_directory_flattener(
     for item in os.listdir(input_path):
         maybe_dir = Path(input_path, item).resolve()
         if not maybe_dir.is_dir():
+            logging.debug(f"Skipping {str(maybe_dir)}, not a directory.")
             continue
 
-        files_with_extension = list(maybe_dir.glob(f"*{file_extension}"))
+        files_with_extension = list(maybe_dir.glob(f"**/*{file_extension}"))
         if not files_with_extension:
+            logging.debug(
+                f"Skipping {str(maybe_dir)}, no files with selected extension."
+            )
             continue
 
         dir_output_path = Path(output_path, item).resolve()
         if not dir_output_path.exists():
+            logging.debug(f"Creating directory {str(dir_output_path)}, didn't exist.")
             dir_output_path.mkdir()
 
         dir_structure_mapping = directory_flatten(
@@ -174,7 +180,10 @@ def multiple_directory_flattener(
             dir_output_path=dir_output_path,
         )
 
-        save_dir_mapping(output_path=dir_output_path, dir_mapping=dir_structure_mapping)
+        save_dir_mapping(
+            output_path=dir_output_path,
+            dir_mapping=dir_structure_mapping,
+        )
 
         output_directories.append(dir_output_path)
 
diff --git a/src/datasetpreparator/settings.py b/src/datasetpreparator/settings.py
index 3f1dc94..1490f54 100644
--- a/src/datasetpreparator/settings.py
+++ b/src/datasetpreparator/settings.py
@@ -1,2 +1 @@
-LOGGING_FORMAT = "[%(asctime)s][%(process)d/%(thread)d][%(levelname)s]\
-                [%(filename)s][%(funcName)s()][line:%(lineno)s] - %(message)s"
+LOGGING_FORMAT = "[%(asctime)s][%(process)d/%(thread)d][%(levelname)s][%(filename)s:%(lineno)s] - %(message)s"

From be6d51ed0cb66b8c41b1d48e6dd5225c854d42e9 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 18 Nov 2024 03:02:01 +0100
Subject: [PATCH 19/92] docs: solving #42 and #43, refined documentation

---
 README.md | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 5c16ccd..103c133 100644
--- a/README.md
+++ b/README.md
@@ -4,35 +4,45 @@
 
 Tools in this repository were used to create the **[SC2ReSet: StarCraft II Esport Replaypack Set](https://doi.org/10.5281/zenodo.5575796)**, and finally **[SC2EGSet: StarCraft II Esport Game State Dataset](https://doi.org/10.5281/zenodo.5503997)**, citation information [Cite Us!](#cite-us).
 
+
 ## Installation
 
+> [!NOTE]
+> To run this project there are some prerequisites that you need to have installed on your system:
+> - Docker
+> - make
+
 Our prefered way of distributing the toolset is through DockerHub. We Use the Docker Image to provide a fully reproducible environment for our scripts.
 
-To build the image please run the following command:
+To pull the image from DockerHub, run the following command:
 
 ```bash
-make docker_build
+docker pull kaszanas/datasetpreparator:latest
 ```
 
-After building the image, please refer to the **[Command Line Arguments Usage](#command-line-arguments-usage)** section for the usage of the scripts.
-
-<!-- To install current version of the toolset as separate CLI tools run the following command:
+If you wish to clone the repository and build the Docker image yourself, run the following command:
 
+```bash
+make docker_build
 ```
-pip install datasetpreparator[all]
-```
 
-After that each of the scripts should be available to call from the command line directly. -->
+After building the image, please refer to the **[Command Line Arguments Usage](#command-line-arguments-usage)** section for the usage of the scripts and for a full description for each of the scripts refer to **[Detailed Tools Description](#detailed-tools-description)**.
+
 
 ## Command Line Arguments Usage
 
 When using Docker, you will have to pass the arguments through the `docker run` command and mount the input/output directory. Below is an example of how to run the `directory_flattener` script using Docker. For ease of use we have prepared example directory structure in the `processing` directory. The command below uses that to issue a command to flatten the directory structure:
 
 ```bash
-docker run -v "./processing:/app/processing" datasetpreparator python3 directory_flattener.py --input_path /app/processing/directory_flattener/input --output_path /app/processing/directory_flattener/output
+docker run \
+  -v "./processing:/app/processing" \
+  datasetpreparator \
+  python3 ./src/datasetpreparator/directory_flattener/directory_flattener.py \
+  --input_path /app/processing/directory_flattener/input \
+  --output_path /app/processing/directory_flattener/output
 ```
 
-## SC2EGSet Dataset Preparation Steps
+## SC2EGSet Dataset Reproduction Steps
 
 > [!NOTE]
 > Instructions below are for reproducing the result of the SC2EGSet dataset. If you wish to use the tools in this repository separately for your own dataset, please refer to the **[Table of Contents](#detailed-tools-description)**.
@@ -41,13 +51,12 @@ To reproduce our experience with defining a dataset and to be able to compare yo
 
 ### Using Docker
 
-We provide a release image containing all of the scripts. To see the usage of these scripts please refer to their respective ``README.md`` files as described above.
+We provide a release image containing all of the scripts. To see the usage of these scripts please refer to their respective ``README.md`` files as described in [Detailed Tools Description](#detailed-tools-description).
 
 The following steps were used to prepare the SC2EGSet dataset:
-1. Build the docker image from: https://github.com/Kaszanas/SC2InfoExtractorGo
-2. Build the docker image for the DatasetPreparator using the provided ```makefile``` command: ```make docker_build```.
-3. Place the input replaypacks into `./processing/directory_flattener/` directory.
-4. Run the command ```make all``` to process the replaypacks and create the dataset. The output will be placed in `./processing/sc2_replaypack_processor/output` directory.
+1. Build the docker image for the DatasetPreparator using the provided ```makefile``` command: ```make docker_build```. This will load all of the dependencies such as the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo).
+2. Place the input replaypacks into `./processing/directory_flattener/input` directory.
+3. Run the command ```make all``` to process the replaypacks and create the dataset. The output will be placed in `./processing/sc2egset_replaypack_processor/output` directory.
 
 
 ### Detailed Tools Description

From 708837521925a6a64a628c4721acf15db6f4c463 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 18 Nov 2024 03:05:09 +0100
Subject: [PATCH 20/92] docs: removed redundant information from README

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 103c133..2f49d93 100644
--- a/README.md
+++ b/README.md
@@ -47,8 +47,6 @@ docker run \
 > [!NOTE]
 > Instructions below are for reproducing the result of the SC2EGSet dataset. If you wish to use the tools in this repository separately for your own dataset, please refer to the **[Table of Contents](#detailed-tools-description)**.
 
-To reproduce our experience with defining a dataset and to be able to compare your results with our work we describe how to perform the processing below.
-
 ### Using Docker
 
 We provide a release image containing all of the scripts. To see the usage of these scripts please refer to their respective ``README.md`` files as described in [Detailed Tools Description](#detailed-tools-description).

From 02870fc60ee76639028a29912770406a2cc5f956 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 18 Nov 2024 17:09:53 +0100
Subject: [PATCH 21/92] docs: added generic information in README, editing

---
 README.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 2f49d93..29260e0 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,9 @@
 
 # DatasetPreparator
 
-Tools in this repository were used to create the **[SC2ReSet: StarCraft II Esport Replaypack Set](https://doi.org/10.5281/zenodo.5575796)**, and finally **[SC2EGSet: StarCraft II Esport Game State Dataset](https://doi.org/10.5281/zenodo.5503997)**, citation information [Cite Us!](#cite-us).
+This project contains various scripts that can assist in the process of preparing datasets. To have a broad overview of the tools please refer to the **[Detailed Tools Description](#detailed-tools-description)**.
 
+Tools in this repository were used to create the **[SC2ReSet: StarCraft II Esport Replaypack Set](https://doi.org/10.5281/zenodo.5575796)**, and finally **[SC2EGSet: StarCraft II Esport Game State Dataset](https://doi.org/10.5281/zenodo.5503997)**, citation information **[Cite Us!](#cite-us)**.
 
 ## Installation
 
@@ -45,7 +46,7 @@ docker run \
 ## SC2EGSet Dataset Reproduction Steps
 
 > [!NOTE]
-> Instructions below are for reproducing the result of the SC2EGSet dataset. If you wish to use the tools in this repository separately for your own dataset, please refer to the **[Table of Contents](#detailed-tools-description)**.
+> Instructions below are for reproducing the result of the SC2EGSet dataset. If you wish to use the tools in this repository separately for your own dataset, please refer to the **[Detailed Tools Description](#detailed-tools-description)**.
 
 ### Using Docker
 
@@ -59,16 +60,16 @@ The following steps were used to prepare the SC2EGSet dataset:
 
 ### Detailed Tools Description
 
-Each of the scripts has its usage described in their respective `README.md` files, you can find the table of contents below.
+Each of the scripts has its usage described in their respective `README.md` files, you can find the detailed description of the available tools below.
 
-#### CLI Usage Generic scripts
+#### CLI Usage; Generic scripts
 1. [Directory Packager (dir_packager): README](src/datasetpreparator/dir_packager/README.md)
 2. [Directory Flattener (directory_flattener): README](src/datasetpreparator/directory_flattener/README.md)
 3. [File Renamer (file_renamer): README](src/datasetpreparator/file_renamer/README.md)
 4. [JSON Merger (json_merger): README](src/datasetpreparator/json_merger/README.md)
 5. [Processed Mapping Copier (processed_mapping_copier): README](src/datasetpreparator/processed_mapping_copier/README.md)
 
-#### CLI Usage StarCraft 2 Specific Scripts
+#### CLI Usage; StarCraft 2 Specific Scripts
 1. [SC2 Map Downloader (sc2_map_downloader): README](src/datasetpreparator/sc2/sc2_map_downloader/README.md)
 2. [SC2EGSet Replaypack Processor (sc2egset_replaypack_processor): README](src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md)
 3. [SC2ReSet Replaypack Downloader (sc2reset_replaypack_downloader): README](src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md)

From 2b633c322516e3b37210727c1a4194e4a9e503f7 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 18 Nov 2024 17:50:15 +0100
Subject: [PATCH 22/92] perf: directory_flattener, hash from filepath, added
 tqdm

---
 .../directory_flattener/Dockerfile            |  2 +-
 .../directory_flattener.py                    | 29 ++++++++++++-------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/Dockerfile b/src/datasetpreparator/directory_flattener/Dockerfile
index 8ee59b3..f317832 100644
--- a/src/datasetpreparator/directory_flattener/Dockerfile
+++ b/src/datasetpreparator/directory_flattener/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:latest
 
-RUN pip install click
+RUN pip install click tqdm
 
 WORKDIR /app
 
diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index ba3a4bb..3b3d0e0 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -4,11 +4,13 @@
 import json
 import shutil
 import logging
+import hashlib
+
 
 import click
+from tqdm import tqdm
 
 from datasetpreparator.settings import LOGGING_FORMAT
-import hashlib
 
 
 def save_dir_mapping(output_path: str, dir_mapping: dict) -> None:
@@ -27,7 +29,6 @@ def save_dir_mapping(output_path: str, dir_mapping: dict) -> None:
         json.dump(dir_mapping, json_file)
 
 
-# REVIEW: This function takes too long. I should be hashing the filepath from the replapack directory:
 def calculate_file_hash(file_path: Path) -> str:
     """
     Calculates the file hash using the selected algorithm.
@@ -44,12 +45,11 @@ def calculate_file_hash(file_path: Path) -> str:
     """
 
     # Open the file, read it in binary mode and calculate the hash:
-    with open(file_path, "rb") as file:
-        file_hash = hashlib.md5()
-        while chunk := file.read(4096):
-            file_hash.update(chunk)
+    path_str = file_path.as_posix().encode("utf-8")
 
-    return file_hash.hexdigest()
+    path_hash = hashlib.md5(path_str).hexdigest()
+
+    return path_hash
 
 
 def directory_flatten(
@@ -77,9 +77,17 @@ def directory_flatten(
 
     # Walk over the directory
     dir_structure_mapping = {}
-    for file in list_of_files:
+    for file in tqdm(
+        list_of_files,
+        desc=f"Flattening directory {root_directory.name}",
+        unit="files",
+    ):
+        # Getting the ReplayPack/directory/structure/file.SC2Replay path,
+        # this is needed to calculate the hash of the filepath:
+        root_dir_name_and_file = root_directory.name / file.relative_to(root_directory)
+
         # Get unique filename:
-        unique_filename = calculate_file_hash(file)
+        unique_filename = calculate_file_hash(root_dir_name_and_file)
         original_extension = file.suffix
         new_path_and_filename = Path(dir_output_path, unique_filename).with_suffix(
             original_extension
@@ -98,8 +106,7 @@ def directory_flatten(
         logging.debug(f"File copied to {new_path_and_filename.as_posix()}")
 
         # Finding the relative path from the root directory to the file:
-        relative_file = os.path.relpath(current_file.as_posix(), root_directory)
-        dir_structure_mapping[new_path_and_filename.name] = relative_file
+        dir_structure_mapping[new_path_and_filename.name] = root_dir_name_and_file
 
     return dir_structure_mapping
 

From a9e2bd414cb3f0b5151bcc98f7852ca3aff5e694 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 18 Nov 2024 17:57:13 +0100
Subject: [PATCH 23/92] fix: converting paths with click, changed target name

---
 README.md                                     |  2 +-
 makefile                                      |  2 +-
 .../dir_packager/dir_packager.py              |  2 +-
 .../directory_flattener.py                    | 21 +++++++++++++------
 .../file_renamer/file_renamer.py              |  2 +-
 .../json_merger/json_merger.py                |  2 +-
 .../processed_mapping_copier.py               | 18 +++++++++++++---
 .../sc2_map_downloader/sc2_map_downloader.py  | 16 ++++++++++++--
 .../sc2egset_replaypack_processor.py          | 18 +++++++++++++---
 .../sc2reset_replaypack_downloader.py         | 12 +++++++++--
 10 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 29260e0..3056823 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ We provide a release image containing all of the scripts. To see the usage of th
 The following steps were used to prepare the SC2EGSet dataset:
 1. Build the docker image for the DatasetPreparator using the provided ```makefile``` command: ```make docker_build```. This will load all of the dependencies such as the [SC2InfoExtractorGo](https://github.com/Kaszanas/SC2InfoExtractorGo).
 2. Place the input replaypacks into `./processing/directory_flattener/input` directory.
-3. Run the command ```make all``` to process the replaypacks and create the dataset. The output will be placed in `./processing/sc2egset_replaypack_processor/output` directory.
+3. Run the command ```make sc2reset_sc2egset``` to process the replaypacks and create the dataset. The output will be placed in `./processing/sc2egset_replaypack_processor/output` directory.
 
 
 ### Detailed Tools Description
diff --git a/makefile b/makefile
index 8d5e65a..be384ef 100644
--- a/makefile
+++ b/makefile
@@ -9,7 +9,7 @@ TEST_COMMAND = "poetry run pytest --durations=100 --ignore-glob='test_*.py' test
 ###################
 #### PIPELINE #####
 ###################
-all: ## Runs the entire processing pipeline to recreate SC2ReSet and SC2EGSet or any other dataset using our standard tooling.
+sc2reset_sc2egset: ## Runs the entire processing pipeline to recreate SC2ReSet and SC2EGSet or any other dataset using our standard tooling.
 	@make flatten
 	@make process_replaypack
 	@make rename_files
diff --git a/src/datasetpreparator/dir_packager/dir_packager.py b/src/datasetpreparator/dir_packager/dir_packager.py
index 1da19ff..6bbd35b 100644
--- a/src/datasetpreparator/dir_packager/dir_packager.py
+++ b/src/datasetpreparator/dir_packager/dir_packager.py
@@ -72,7 +72,7 @@ def dir_packager(directory_path: Path) -> Path:
 )
 @click.option(
     "--log",
-    type=click.Choice(["INFO", "DEBUG", "ERROR"], case_sensitive=False),
+    type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
     help="Log level (INFO, DEBUG, ERROR)",
 )
diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index 3b3d0e0..ea8945c 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -202,13 +202,25 @@ def multiple_directory_flattener(
 )
 @click.option(
     "--input_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Please provide input path to the dataset that is going to be processed.",
 )
 @click.option(
     "--output_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Please provide output path where the tool will put files after processing.",
 )
@@ -225,15 +237,12 @@ def multiple_directory_flattener(
     default="WARN",
     help="Log level (INFO, DEBUG, ERROR)",
 )
-def main(input_path: str, output_path: str, file_extension: str, log: str) -> None:
+def main(input_path: Path, output_path: Path, file_extension: str, log: str) -> None:
     numeric_level = getattr(logging, log.upper(), None)
     if not isinstance(numeric_level, int):
         raise ValueError(f"Invalid log level: {numeric_level}")
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
-    input_path = Path(input_path)
-    output_path = Path(output_path)
-
     multiple_directory_flattener(
         input_path=input_path, output_path=output_path, file_extension=file_extension
     )
diff --git a/src/datasetpreparator/file_renamer/file_renamer.py b/src/datasetpreparator/file_renamer/file_renamer.py
index cc1d3d6..c1fb41c 100644
--- a/src/datasetpreparator/file_renamer/file_renamer.py
+++ b/src/datasetpreparator/file_renamer/file_renamer.py
@@ -86,7 +86,7 @@ def file_renamer(input_path: Path) -> None:
 )
 @click.option(
     "--log",
-    type=click.Choice(["INFO", "DEBUG", "ERROR"], case_sensitive=False),
+    type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
     help="Log level (INFO, DEBUG, ERROR)",
 )
diff --git a/src/datasetpreparator/json_merger/json_merger.py b/src/datasetpreparator/json_merger/json_merger.py
index d64de14..3b9864f 100644
--- a/src/datasetpreparator/json_merger/json_merger.py
+++ b/src/datasetpreparator/json_merger/json_merger.py
@@ -119,7 +119,7 @@ def json_merger(
 )
 @click.option(
     "--log",
-    type=click.Choice(["INFO", "DEBUG", "ERROR"], case_sensitive=False),
+    type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
     help="Log level (INFO, DEBUG, ERROR)",
 )
diff --git a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
index 42b44c2..ef1fcd3 100644
--- a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
+++ b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
@@ -47,19 +47,31 @@ def processed_mapping_copier(input_path: str, output_path: str) -> None:
 )
 @click.option(
     "--input_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Please provide input path to the flattened replaypacks that contain procesed_mapping.json files.",
 )
 @click.option(
     "--output_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Please provide output path where processed_mapping.json will be copied.",
 )
 @click.option(
     "--log",
-    type=click.Choice(["INFO", "DEBUG", "ERROR"], case_sensitive=False),
+    type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
     help="Log level (INFO, DEBUG, ERROR)",
 )
diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py b/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
index 9c6bf78..c7ed5b7 100644
--- a/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
+++ b/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
@@ -108,13 +108,25 @@ def sc2_map_downloader(input_path: Path, output_path: Path) -> Path:
 )
 @click.option(
     "--input_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Please provide input path to the dataset that is going to be processed.",
 )
 @click.option(
     "--output_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Please provide output path where StarCraft 2 (SC2) map files will be downloaded.",
 )
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 2cb10e8..f58a58d 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -100,13 +100,25 @@ def sc2egset_replaypack_processor(
 )
 @click.option(
     "--input_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Please provide an output directory for the resulting files.",
 )
 @click.option(
     "--output_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Please provide output path where StarCraft 2 (SC2) map files will be downloaded.",
 )
@@ -126,7 +138,7 @@ def sc2egset_replaypack_processor(
 )
 @click.option(
     "--log",
-    type=click.Choice(["INFO", "DEBUG", "ERROR"], case_sensitive=False),
+    type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
     help="Log level (INFO, DEBUG, ERROR)",
 )
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
index 34c7ce3..8140302 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
@@ -82,7 +82,11 @@ def sc2reset_replaypack_downloader(
 @click.option(
     "--download_path",
     type=click.Path(
-        exists=False, dir_okay=True, file_okay=False, resolve_path=True, path_type=Path
+        exists=False,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
     ),
     required=True,
     help="Please provide a path to which the archives will be downloaded.",
@@ -90,7 +94,11 @@ def sc2reset_replaypack_downloader(
 @click.option(
     "--unpack_path",
     type=click.Path(
-        exists=False, dir_okay=True, file_okay=False, resolve_path=True, path_type=Path
+        exists=False,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
     ),
     required=True,
     help="Please provide a path to which the archives will be unpacked.",

From f601edf108b391ddaf8a553a6b77f19ad07c9277 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 18 Nov 2024 22:20:04 +0100
Subject: [PATCH 24/92] docs: fixed READMEs after review

---
 src/datasetpreparator/dir_packager/README.md | 6 +++---
 src/datasetpreparator/json_merger/README.md  | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/datasetpreparator/dir_packager/README.md b/src/datasetpreparator/dir_packager/README.md
index b26d935..be04547 100644
--- a/src/datasetpreparator/dir_packager/README.md
+++ b/src/datasetpreparator/dir_packager/README.md
@@ -1,10 +1,10 @@
-# File Packager
+# Directory Packager
 
-Utility script for compressing a directory into a `.zip` archive.
+Utility script for compressing a directory into a `.zip` archive. This script iterates over all of the directories in the input directory and compresses them into `.zip` archives.
 
 # CLI Usage
 
-Please keep in mind that the  ```src/file_packager.py``` contains default flag values and can be customized with the following command line flags:
+Please keep in mind that the  ```src/dir_packager.py``` contains default flag values and can be customized with the following command line flags:
 ```
 Usage: dir_packager.py [OPTIONS]
 
diff --git a/src/datasetpreparator/json_merger/README.md b/src/datasetpreparator/json_merger/README.md
index d11f1fe..5524cda 100644
--- a/src/datasetpreparator/json_merger/README.md
+++ b/src/datasetpreparator/json_merger/README.md
@@ -1,6 +1,6 @@
-# File Renamer
+# JSON Merger
 
-Utility script that is merging two JSON files.
+Utility script that is merging two JSON files into an output JSON file.
 
 # CLI Usage
 

From 302a8efc1c2c9f714cf9afac924e7049aab26327 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 01:23:20 +0100
Subject: [PATCH 25/92] build: bumped dependency versions

---
 poetry.lock    | 232 ++++++++++++++++++++++++++-----------------------
 pyproject.toml |  14 +--
 2 files changed, 129 insertions(+), 117 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 4385dc6..a169519 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -211,63 +211,73 @@ tomlkit = ">=0.5.3,<1.0.0"
 
 [[package]]
 name = "coverage"
-version = "7.4.0"
+version = "7.6.7"
 description = "Code coverage measurement for Python"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "coverage-7.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36b0ea8ab20d6a7564e89cb6135920bc9188fb5f1f7152e94e8300b7b189441a"},
-    {file = "coverage-7.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0676cd0ba581e514b7f726495ea75aba3eb20899d824636c6f59b0ed2f88c471"},
-    {file = "coverage-7.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ca5c71a5a1765a0f8f88022c52b6b8be740e512980362f7fdbb03725a0d6b9"},
-    {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c97726520f784239f6c62506bc70e48d01ae71e9da128259d61ca5e9788516"},
-    {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:815ac2d0f3398a14286dc2cea223a6f338109f9ecf39a71160cd1628786bc6f5"},
-    {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:80b5ee39b7f0131ebec7968baa9b2309eddb35b8403d1869e08f024efd883566"},
-    {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5b2ccb7548a0b65974860a78c9ffe1173cfb5877460e5a229238d985565574ae"},
-    {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:995ea5c48c4ebfd898eacb098164b3cc826ba273b3049e4a889658548e321b43"},
-    {file = "coverage-7.4.0-cp310-cp310-win32.whl", hash = "sha256:79287fd95585ed36e83182794a57a46aeae0b64ca53929d1176db56aacc83451"},
-    {file = "coverage-7.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b14b4f8760006bfdb6e08667af7bc2d8d9bfdb648351915315ea17645347137"},
-    {file = "coverage-7.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04387a4a6ecb330c1878907ce0dc04078ea72a869263e53c72a1ba5bbdf380ca"},
-    {file = "coverage-7.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea81d8f9691bb53f4fb4db603203029643caffc82bf998ab5b59ca05560f4c06"},
-    {file = "coverage-7.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74775198b702868ec2d058cb92720a3c5a9177296f75bd97317c787daf711505"},
-    {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76f03940f9973bfaee8cfba70ac991825611b9aac047e5c80d499a44079ec0bc"},
-    {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:485e9f897cf4856a65a57c7f6ea3dc0d4e6c076c87311d4bc003f82cfe199d25"},
-    {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6ae8c9d301207e6856865867d762a4b6fd379c714fcc0607a84b92ee63feff70"},
-    {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bf477c355274a72435ceb140dc42de0dc1e1e0bf6e97195be30487d8eaaf1a09"},
-    {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:83c2dda2666fe32332f8e87481eed056c8b4d163fe18ecc690b02802d36a4d26"},
-    {file = "coverage-7.4.0-cp311-cp311-win32.whl", hash = "sha256:697d1317e5290a313ef0d369650cfee1a114abb6021fa239ca12b4849ebbd614"},
-    {file = "coverage-7.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:26776ff6c711d9d835557ee453082025d871e30b3fd6c27fcef14733f67f0590"},
-    {file = "coverage-7.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:13eaf476ec3e883fe3e5fe3707caeb88268a06284484a3daf8250259ef1ba143"},
-    {file = "coverage-7.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846f52f46e212affb5bcf131c952fb4075b55aae6b61adc9856222df89cbe3e2"},
-    {file = "coverage-7.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26f66da8695719ccf90e794ed567a1549bb2644a706b41e9f6eae6816b398c4a"},
-    {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:164fdcc3246c69a6526a59b744b62e303039a81e42cfbbdc171c91a8cc2f9446"},
-    {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:316543f71025a6565677d84bc4df2114e9b6a615aa39fb165d697dba06a54af9"},
-    {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bb1de682da0b824411e00a0d4da5a784ec6496b6850fdf8c865c1d68c0e318dd"},
-    {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0e8d06778e8fbffccfe96331a3946237f87b1e1d359d7fbe8b06b96c95a5407a"},
-    {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a56de34db7b7ff77056a37aedded01b2b98b508227d2d0979d373a9b5d353daa"},
-    {file = "coverage-7.4.0-cp312-cp312-win32.whl", hash = "sha256:51456e6fa099a8d9d91497202d9563a320513fcf59f33991b0661a4a6f2ad450"},
-    {file = "coverage-7.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd3c1e4cb2ff0083758f09be0f77402e1bdf704adb7f89108007300a6da587d0"},
-    {file = "coverage-7.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d1bf53c4c8de58d22e0e956a79a5b37f754ed1ffdbf1a260d9dcfa2d8a325e"},
-    {file = "coverage-7.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:109f5985182b6b81fe33323ab4707011875198c41964f014579cf82cebf2bb85"},
-    {file = "coverage-7.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc9d4bc55de8003663ec94c2f215d12d42ceea128da8f0f4036235a119c88ac"},
-    {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc6d65b21c219ec2072c1293c505cf36e4e913a3f936d80028993dd73c7906b1"},
-    {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a10a4920def78bbfff4eff8a05c51be03e42f1c3735be42d851f199144897ba"},
-    {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b8e99f06160602bc64da35158bb76c73522a4010f0649be44a4e167ff8555952"},
-    {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7d360587e64d006402b7116623cebf9d48893329ef035278969fa3bbf75b697e"},
-    {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29f3abe810930311c0b5d1a7140f6395369c3db1be68345638c33eec07535105"},
-    {file = "coverage-7.4.0-cp38-cp38-win32.whl", hash = "sha256:5040148f4ec43644702e7b16ca864c5314ccb8ee0751ef617d49aa0e2d6bf4f2"},
-    {file = "coverage-7.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9864463c1c2f9cb3b5db2cf1ff475eed2f0b4285c2aaf4d357b69959941aa555"},
-    {file = "coverage-7.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:936d38794044b26c99d3dd004d8af0035ac535b92090f7f2bb5aa9c8e2f5cd42"},
-    {file = "coverage-7.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:799c8f873794a08cdf216aa5d0531c6a3747793b70c53f70e98259720a6fe2d7"},
-    {file = "coverage-7.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7defbb9737274023e2d7af02cac77043c86ce88a907c58f42b580a97d5bcca9"},
-    {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1526d265743fb49363974b7aa8d5899ff64ee07df47dd8d3e37dcc0818f09ed"},
-    {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf635a52fc1ea401baf88843ae8708591aa4adff875e5c23220de43b1ccf575c"},
-    {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:756ded44f47f330666843b5781be126ab57bb57c22adbb07d83f6b519783b870"},
-    {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0eb3c2f32dabe3a4aaf6441dde94f35687224dfd7eb2a7f47f3fd9428e421058"},
-    {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bfd5db349d15c08311702611f3dccbef4b4e2ec148fcc636cf8739519b4a5c0f"},
-    {file = "coverage-7.4.0-cp39-cp39-win32.whl", hash = "sha256:53d7d9158ee03956e0eadac38dfa1ec8068431ef8058fe6447043db1fb40d932"},
-    {file = "coverage-7.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfd2a8b6b0d8e66e944d47cdec2f47c48fef2ba2f2dff5a9a75757f64172857e"},
-    {file = "coverage-7.4.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:c530833afc4707fe48524a44844493f36d8727f04dcce91fb978c414a8556cc6"},
-    {file = "coverage-7.4.0.tar.gz", hash = "sha256:707c0f58cb1712b8809ece32b68996ee1e609f71bd14615bd8f87a1293cb610e"},
+    {file = "coverage-7.6.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:108bb458827765d538abcbf8288599fee07d2743357bdd9b9dad456c287e121e"},
+    {file = "coverage-7.6.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c973b2fe4dc445cb865ab369df7521df9c27bf40715c837a113edaa2aa9faf45"},
+    {file = "coverage-7.6.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c6b24007c4bcd0b19fac25763a7cac5035c735ae017e9a349b927cfc88f31c1"},
+    {file = "coverage-7.6.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:acbb8af78f8f91b3b51f58f288c0994ba63c646bc1a8a22ad072e4e7e0a49f1c"},
+    {file = "coverage-7.6.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad32a981bcdedb8d2ace03b05e4fd8dace8901eec64a532b00b15217d3677dd2"},
+    {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:34d23e28ccb26236718a3a78ba72744212aa383141961dd6825f6595005c8b06"},
+    {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e25bacb53a8c7325e34d45dddd2f2fbae0dbc230d0e2642e264a64e17322a777"},
+    {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af05bbba896c4472a29408455fe31b3797b4d8648ed0a2ccac03e074a77e2314"},
+    {file = "coverage-7.6.7-cp310-cp310-win32.whl", hash = "sha256:796c9b107d11d2d69e1849b2dfe41730134b526a49d3acb98ca02f4985eeff7a"},
+    {file = "coverage-7.6.7-cp310-cp310-win_amd64.whl", hash = "sha256:987a8e3da7da4eed10a20491cf790589a8e5e07656b6dc22d3814c4d88faf163"},
+    {file = "coverage-7.6.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7e61b0e77ff4dddebb35a0e8bb5a68bf0f8b872407d8d9f0c726b65dfabe2469"},
+    {file = "coverage-7.6.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1a5407a75ca4abc20d6252efeb238377a71ce7bda849c26c7a9bece8680a5d99"},
+    {file = "coverage-7.6.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df002e59f2d29e889c37abd0b9ee0d0e6e38c24f5f55d71ff0e09e3412a340ec"},
+    {file = "coverage-7.6.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:673184b3156cba06154825f25af33baa2671ddae6343f23175764e65a8c4c30b"},
+    {file = "coverage-7.6.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e69ad502f1a2243f739f5bd60565d14a278be58be4c137d90799f2c263e7049a"},
+    {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:60dcf7605c50ea72a14490d0756daffef77a5be15ed1b9fea468b1c7bda1bc3b"},
+    {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9c2eb378bebb2c8f65befcb5147877fc1c9fbc640fc0aad3add759b5df79d55d"},
+    {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3c0317288f032221d35fa4cbc35d9f4923ff0dfd176c79c9b356e8ef8ef2dff4"},
+    {file = "coverage-7.6.7-cp311-cp311-win32.whl", hash = "sha256:951aade8297358f3618a6e0660dc74f6b52233c42089d28525749fc8267dccd2"},
+    {file = "coverage-7.6.7-cp311-cp311-win_amd64.whl", hash = "sha256:5e444b8e88339a2a67ce07d41faabb1d60d1004820cee5a2c2b54e2d8e429a0f"},
+    {file = "coverage-7.6.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f07ff574986bc3edb80e2c36391678a271d555f91fd1d332a1e0f4b5ea4b6ea9"},
+    {file = "coverage-7.6.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:49ed5ee4109258973630c1f9d099c7e72c5c36605029f3a91fe9982c6076c82b"},
+    {file = "coverage-7.6.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3e8796434a8106b3ac025fd15417315d7a58ee3e600ad4dbcfddc3f4b14342c"},
+    {file = "coverage-7.6.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3b925300484a3294d1c70f6b2b810d6526f2929de954e5b6be2bf8caa1f12c1"},
+    {file = "coverage-7.6.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c42ec2c522e3ddd683dec5cdce8e62817afb648caedad9da725001fa530d354"},
+    {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0266b62cbea568bd5e93a4da364d05de422110cbed5056d69339bd5af5685433"},
+    {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e5f2a0f161d126ccc7038f1f3029184dbdf8f018230af17ef6fd6a707a5b881f"},
+    {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c132b5a22821f9b143f87446805e13580b67c670a548b96da945a8f6b4f2efbb"},
+    {file = "coverage-7.6.7-cp312-cp312-win32.whl", hash = "sha256:7c07de0d2a110f02af30883cd7dddbe704887617d5c27cf373362667445a4c76"},
+    {file = "coverage-7.6.7-cp312-cp312-win_amd64.whl", hash = "sha256:fd49c01e5057a451c30c9b892948976f5d38f2cbd04dc556a82743ba8e27ed8c"},
+    {file = "coverage-7.6.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:46f21663e358beae6b368429ffadf14ed0a329996248a847a4322fb2e35d64d3"},
+    {file = "coverage-7.6.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:40cca284c7c310d622a1677f105e8507441d1bb7c226f41978ba7c86979609ab"},
+    {file = "coverage-7.6.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77256ad2345c29fe59ae861aa11cfc74579c88d4e8dbf121cbe46b8e32aec808"},
+    {file = "coverage-7.6.7-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87ea64b9fa52bf395272e54020537990a28078478167ade6c61da7ac04dc14bc"},
+    {file = "coverage-7.6.7-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d608a7808793e3615e54e9267519351c3ae204a6d85764d8337bd95993581a8"},
+    {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdd94501d65adc5c24f8a1a0eda110452ba62b3f4aeaba01e021c1ed9cb8f34a"},
+    {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:82c809a62e953867cf57e0548c2b8464207f5f3a6ff0e1e961683e79b89f2c55"},
+    {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bb684694e99d0b791a43e9fc0fa58efc15ec357ac48d25b619f207c41f2fd384"},
+    {file = "coverage-7.6.7-cp313-cp313-win32.whl", hash = "sha256:963e4a08cbb0af6623e61492c0ec4c0ec5c5cf74db5f6564f98248d27ee57d30"},
+    {file = "coverage-7.6.7-cp313-cp313-win_amd64.whl", hash = "sha256:14045b8bfd5909196a90da145a37f9d335a5d988a83db34e80f41e965fb7cb42"},
+    {file = "coverage-7.6.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f2c7a045eef561e9544359a0bf5784b44e55cefc7261a20e730baa9220c83413"},
+    {file = "coverage-7.6.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dd4e4a49d9c72a38d18d641135d2fb0bdf7b726ca60a103836b3d00a1182acd"},
+    {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c95e0fa3d1547cb6f021ab72f5c23402da2358beec0a8e6d19a368bd7b0fb37"},
+    {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f63e21ed474edd23f7501f89b53280014436e383a14b9bd77a648366c81dce7b"},
+    {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead9b9605c54d15be228687552916c89c9683c215370c4a44f1f217d2adcc34d"},
+    {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0573f5cbf39114270842d01872952d301027d2d6e2d84013f30966313cadb529"},
+    {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e2c8e3384c12dfa19fa9a52f23eb091a8fad93b5b81a41b14c17c78e23dd1d8b"},
+    {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:70a56a2ec1869e6e9fa69ef6b76b1a8a7ef709972b9cc473f9ce9d26b5997ce3"},
+    {file = "coverage-7.6.7-cp313-cp313t-win32.whl", hash = "sha256:dbba8210f5067398b2c4d96b4e64d8fb943644d5eb70be0d989067c8ca40c0f8"},
+    {file = "coverage-7.6.7-cp313-cp313t-win_amd64.whl", hash = "sha256:dfd14bcae0c94004baba5184d1c935ae0d1231b8409eb6c103a5fd75e8ecdc56"},
+    {file = "coverage-7.6.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37a15573f988b67f7348916077c6d8ad43adb75e478d0910957394df397d2874"},
+    {file = "coverage-7.6.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b6cce5c76985f81da3769c52203ee94722cd5d5889731cd70d31fee939b74bf0"},
+    {file = "coverage-7.6.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ab9763d291a17b527ac6fd11d1a9a9c358280adb320e9c2672a97af346ac2c"},
+    {file = "coverage-7.6.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cf96ceaa275f071f1bea3067f8fd43bec184a25a962c754024c973af871e1b7"},
+    {file = "coverage-7.6.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aee9cf6b0134d6f932d219ce253ef0e624f4fa588ee64830fcba193269e4daa3"},
+    {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2bc3e45c16564cc72de09e37413262b9f99167803e5e48c6156bccdfb22c8327"},
+    {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:623e6965dcf4e28a3debaa6fcf4b99ee06d27218f46d43befe4db1c70841551c"},
+    {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:850cfd2d6fc26f8346f422920ac204e1d28814e32e3a58c19c91980fa74d8289"},
+    {file = "coverage-7.6.7-cp39-cp39-win32.whl", hash = "sha256:c296263093f099da4f51b3dff1eff5d4959b527d4f2f419e16508c5da9e15e8c"},
+    {file = "coverage-7.6.7-cp39-cp39-win_amd64.whl", hash = "sha256:90746521206c88bdb305a4bf3342b1b7316ab80f804d40c536fc7d329301ee13"},
+    {file = "coverage-7.6.7-pp39.pp310-none-any.whl", hash = "sha256:0ddcb70b3a3a57581b450571b31cb774f23eb9519c2aaa6176d3a84c9fc57671"},
+    {file = "coverage-7.6.7.tar.gz", hash = "sha256:d79d4826e41441c9a118ff045e4bccb9fdbdcb1d02413e7ea6eb5c87b5439d24"},
 ]
 
 [package.dependencies]
@@ -433,13 +443,13 @@ i18n = ["Babel (>=2.7)"]
 
 [[package]]
 name = "markdown"
-version = "3.5.1"
+version = "3.7"
 description = "Python implementation of John Gruber's Markdown."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "Markdown-3.5.1-py3-none-any.whl", hash = "sha256:5874b47d4ee3f0b14d764324d2c94c03ea66bee56f2d929da9f2508d65e722dc"},
-    {file = "Markdown-3.5.1.tar.gz", hash = "sha256:b65d7beb248dc22f2e8a31fb706d93798093c308dc1aba295aedeb9d41a813bd"},
+    {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"},
+    {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"},
 ]
 
 [package.extras]
@@ -558,17 +568,18 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp
 
 [[package]]
 name = "mkdocs-autorefs"
-version = "0.5.0"
+version = "1.2.0"
 description = "Automatically link across pages in MkDocs."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "mkdocs_autorefs-0.5.0-py3-none-any.whl", hash = "sha256:7930fcb8ac1249f10e683967aeaddc0af49d90702af111a5e390e8b20b3d97ff"},
-    {file = "mkdocs_autorefs-0.5.0.tar.gz", hash = "sha256:9a5054a94c08d28855cfab967ada10ed5be76e2bfad642302a610b252c3274c0"},
+    {file = "mkdocs_autorefs-1.2.0-py3-none-any.whl", hash = "sha256:d588754ae89bd0ced0c70c06f58566a4ee43471eeeee5202427da7de9ef85a2f"},
+    {file = "mkdocs_autorefs-1.2.0.tar.gz", hash = "sha256:a86b93abff653521bda71cf3fc5596342b7a23982093915cb74273f67522190f"},
 ]
 
 [package.dependencies]
 Markdown = ">=3.3"
+markupsafe = ">=2.0.1"
 mkdocs = ">=1.1"
 
 [[package]]
@@ -587,18 +598,18 @@ mkdocs = ">=1.0.3"
 
 [[package]]
 name = "mkdocs-include-markdown-plugin"
-version = "6.0.4"
+version = "7.0.1"
 description = "Mkdocs Markdown includer plugin."
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "mkdocs_include_markdown_plugin-6.0.4-py3-none-any.whl", hash = "sha256:e7b8b5ecc41d6a3e16969cff3725ec3a391b68e9dfe1a4b4e36a8508becda835"},
-    {file = "mkdocs_include_markdown_plugin-6.0.4.tar.gz", hash = "sha256:523c9c3a1d6a517386dc11bf60b0c0c564af1071bb6de8d213106d54f752dcc1"},
+    {file = "mkdocs_include_markdown_plugin-7.0.1-py3-none-any.whl", hash = "sha256:4abd341cb1c5eac60ddd1a21540fdff714f1acc99e3b26f37641db60cd175a8d"},
+    {file = "mkdocs_include_markdown_plugin-7.0.1.tar.gz", hash = "sha256:d619c206109dab4bab281e2d29b645838d55b0576c761b1fbb17e6bff1170206"},
 ]
 
 [package.dependencies]
 mkdocs = ">=1.4"
-wcmatch = ">=8,<9"
+wcmatch = "*"
 
 [package.extras]
 cache = ["platformdirs"]
@@ -673,23 +684,23 @@ mkdocs = ">=1.2"
 
 [[package]]
 name = "mkdocstrings"
-version = "0.24.0"
+version = "0.27.0"
 description = "Automatic documentation from sources, for MkDocs."
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "mkdocstrings-0.24.0-py3-none-any.whl", hash = "sha256:f4908560c10f587326d8f5165d1908817b2e280bbf707607f601c996366a2264"},
-    {file = "mkdocstrings-0.24.0.tar.gz", hash = "sha256:222b1165be41257b494a9d29b14135d2b7ca43f38161d5b10caae03b87bd4f7e"},
+    {file = "mkdocstrings-0.27.0-py3-none-any.whl", hash = "sha256:6ceaa7ea830770959b55a16203ac63da24badd71325b96af950e59fd37366332"},
+    {file = "mkdocstrings-0.27.0.tar.gz", hash = "sha256:16adca6d6b0a1f9e0c07ff0b02ced8e16f228a9d65a37c063ec4c14d7b76a657"},
 ]
 
 [package.dependencies]
 click = ">=7.0"
 Jinja2 = ">=2.11.1"
-Markdown = ">=3.3"
+Markdown = ">=3.6"
 MarkupSafe = ">=1.1"
 mkdocs = ">=1.4"
-mkdocs-autorefs = ">=0.3.1"
-platformdirs = ">=2.2.0"
+mkdocs-autorefs = ">=1.2"
+platformdirs = ">=2.2"
 pymdown-extensions = ">=6.3"
 
 [package.extras]
@@ -785,13 +796,13 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co
 
 [[package]]
 name = "pluggy"
-version = "1.3.0"
+version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
-    {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
+    {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
+    {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
 ]
 
 [package.extras]
@@ -800,13 +811,13 @@ testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "pre-commit"
-version = "3.6.0"
+version = "4.0.1"
 description = "A framework for managing and maintaining multi-language pre-commit hooks."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "pre_commit-3.6.0-py2.py3-none-any.whl", hash = "sha256:c255039ef399049a5544b6ce13d135caba8f2c28c3b4033277a788f434308376"},
-    {file = "pre_commit-3.6.0.tar.gz", hash = "sha256:d30bad9abf165f7785c15a21a1f46da7d0677cb00ee7ff4c579fd38922efe15d"},
+    {file = "pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878"},
+    {file = "pre_commit-4.0.1.tar.gz", hash = "sha256:80905ac375958c0444c65e9cebebd948b3cdb518f335a091a670a89d652139d2"},
 ]
 
 [package.dependencies]
@@ -865,13 +876,13 @@ extra = ["pygments (>=2.12)"]
 
 [[package]]
 name = "pytest"
-version = "7.4.4"
+version = "8.3.3"
 description = "pytest: simple powerful testing with Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
-    {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
+    {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"},
+    {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"},
 ]
 
 [package.dependencies]
@@ -879,29 +890,29 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""}
 exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
 iniconfig = "*"
 packaging = "*"
-pluggy = ">=0.12,<2.0"
-tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+pluggy = ">=1.5,<2"
+tomli = {version = ">=1", markers = "python_version < \"3.11\""}
 
 [package.extras]
-testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 
 [[package]]
 name = "pytest-cov"
-version = "4.1.0"
+version = "6.0.0"
 description = "Pytest plugin for measuring coverage."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.9"
 files = [
-    {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"},
-    {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"},
+    {file = "pytest-cov-6.0.0.tar.gz", hash = "sha256:fde0b595ca248bb8e2d76f020b465f3b107c9632e6a1d1705f17834c89dcadc0"},
+    {file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"},
 ]
 
 [package.dependencies]
-coverage = {version = ">=5.2.1", extras = ["toml"]}
+coverage = {version = ">=7.5", extras = ["toml"]}
 pytest = ">=4.6"
 
 [package.extras]
-testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"]
+testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"]
 
 [[package]]
 name = "python-dateutil"
@@ -1130,28 +1141,29 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "ruff"
-version = "0.1.11"
+version = "0.7.4"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.1.11-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:a7f772696b4cdc0a3b2e527fc3c7ccc41cdcb98f5c80fdd4f2b8c50eb1458196"},
-    {file = "ruff-0.1.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:934832f6ed9b34a7d5feea58972635c2039c7a3b434fe5ba2ce015064cb6e955"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea0d3e950e394c4b332bcdd112aa566010a9f9c95814844a7468325290aabfd9"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9bd4025b9c5b429a48280785a2b71d479798a69f5c2919e7d274c5f4b32c3607"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1ad00662305dcb1e987f5ec214d31f7d6a062cae3e74c1cbccef15afd96611d"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4b077ce83f47dd6bea1991af08b140e8b8339f0ba8cb9b7a484c30ebab18a23f"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4a88efecec23c37b11076fe676e15c6cdb1271a38f2b415e381e87fe4517f18"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b25093dad3b055667730a9b491129c42d45e11cdb7043b702e97125bcec48a1"},
-    {file = "ruff-0.1.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:231d8fb11b2cc7c0366a326a66dafc6ad449d7fcdbc268497ee47e1334f66f77"},
-    {file = "ruff-0.1.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:09c415716884950080921dd6237767e52e227e397e2008e2bed410117679975b"},
-    {file = "ruff-0.1.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0f58948c6d212a6b8d41cd59e349751018797ce1727f961c2fa755ad6208ba45"},
-    {file = "ruff-0.1.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:190a566c8f766c37074d99640cd9ca3da11d8deae2deae7c9505e68a4a30f740"},
-    {file = "ruff-0.1.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6464289bd67b2344d2a5d9158d5eb81025258f169e69a46b741b396ffb0cda95"},
-    {file = "ruff-0.1.11-py3-none-win32.whl", hash = "sha256:9b8f397902f92bc2e70fb6bebfa2139008dc72ae5177e66c383fa5426cb0bf2c"},
-    {file = "ruff-0.1.11-py3-none-win_amd64.whl", hash = "sha256:eb85ee287b11f901037a6683b2374bb0ec82928c5cbc984f575d0437979c521a"},
-    {file = "ruff-0.1.11-py3-none-win_arm64.whl", hash = "sha256:97ce4d752f964ba559c7023a86e5f8e97f026d511e48013987623915431c7ea9"},
-    {file = "ruff-0.1.11.tar.gz", hash = "sha256:f9d4d88cb6eeb4dfe20f9f0519bd2eaba8119bde87c3d5065c541dbae2b5a2cb"},
+    {file = "ruff-0.7.4-py3-none-linux_armv6l.whl", hash = "sha256:a4919925e7684a3f18e18243cd6bea7cfb8e968a6eaa8437971f681b7ec51478"},
+    {file = "ruff-0.7.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cfb365c135b830778dda8c04fb7d4280ed0b984e1aec27f574445231e20d6c63"},
+    {file = "ruff-0.7.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:63a569b36bc66fbadec5beaa539dd81e0527cb258b94e29e0531ce41bacc1f20"},
+    {file = "ruff-0.7.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d06218747d361d06fd2fdac734e7fa92df36df93035db3dc2ad7aa9852cb109"},
+    {file = "ruff-0.7.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0cea28d0944f74ebc33e9f934238f15c758841f9f5edd180b5315c203293452"},
+    {file = "ruff-0.7.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80094ecd4793c68b2571b128f91754d60f692d64bc0d7272ec9197fdd09bf9ea"},
+    {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:997512325c6620d1c4c2b15db49ef59543ef9cd0f4aa8065ec2ae5103cedc7e7"},
+    {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00b4cf3a6b5fad6d1a66e7574d78956bbd09abfd6c8a997798f01f5da3d46a05"},
+    {file = "ruff-0.7.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7dbdc7d8274e1422722933d1edddfdc65b4336abf0b16dfcb9dedd6e6a517d06"},
+    {file = "ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e92dfb5f00eaedb1501b2f906ccabfd67b2355bdf117fea9719fc99ac2145bc"},
+    {file = "ruff-0.7.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3bd726099f277d735dc38900b6a8d6cf070f80828877941983a57bca1cd92172"},
+    {file = "ruff-0.7.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2e32829c429dd081ee5ba39aef436603e5b22335c3d3fff013cd585806a6486a"},
+    {file = "ruff-0.7.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:662a63b4971807623f6f90c1fb664613f67cc182dc4d991471c23c541fee62dd"},
+    {file = "ruff-0.7.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:876f5e09eaae3eb76814c1d3b68879891d6fde4824c015d48e7a7da4cf066a3a"},
+    {file = "ruff-0.7.4-py3-none-win32.whl", hash = "sha256:75c53f54904be42dd52a548728a5b572344b50d9b2873d13a3f8c5e3b91f5cac"},
+    {file = "ruff-0.7.4-py3-none-win_amd64.whl", hash = "sha256:745775c7b39f914238ed1f1b0bebed0b9155a17cd8bc0b08d3c87e4703b990d6"},
+    {file = "ruff-0.7.4-py3-none-win_arm64.whl", hash = "sha256:11bff065102c3ae9d3ea4dc9ecdfe5a5171349cdd0787c1fc64761212fc9cf1f"},
+    {file = "ruff-0.7.4.tar.gz", hash = "sha256:cd12e35031f5af6b9b93715d8c4f40360070b2041f81273d0527683d5708fce2"},
 ]
 
 [[package]]
@@ -1372,4 +1384,4 @@ all = ["requests", "sc2reader", "tqdm"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "aa8822a603b9e87c7c77471121ccaf87aadbbee211cf8f177054fe1f783d8bf5"
+content-hash = "9b4595f0e3dac1958b3d9b2f868c96ac762ab691f3f250cfe5332b7f00207690"
diff --git a/pyproject.toml b/pyproject.toml
index 05cdccc..2e86a4b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,17 +20,17 @@ requests = { version = "^2.31.0", optional = true }
 
 
 [tool.poetry.group.dev.dependencies]
-pytest = "^7.4.4"
-pytest-cov = "^4.1.0"
-ruff = "^0.1.7"
-pre-commit = "^3.6.0"
+pytest = "^8.3.3"
+pytest-cov = "^6.0.0"
+ruff = "^0.7.4"
+pre-commit = "^4.0.1"
 mkdocs = "^1.5.3"
-mkdocstrings = "^0.24.0"
+mkdocstrings = "^0.27.0"
 mkdocstrings-python = "^1.7.5"
 mkdocs-material = "^9.5.3"
-mkdocs-include-markdown-plugin = "^6.0.4"
+mkdocs-include-markdown-plugin = "^7.0.1"
 mkdocs-material-extensions = "^1.3.1"
-mkdocs-autorefs = "^0.5.0"
+mkdocs-autorefs = "^1.2.0"
 mkdocs-gen-files = "^0.5.0"
 mkdocs-literate-nav = "^0.6.1"
 mkdocs-section-index = "^0.3.8"

From 67bbba05aa7ae64a8289131c6c1df4ace82ff8ce Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 01:27:17 +0100
Subject: [PATCH 26/92] refactor: renamed dir_packager to directory_packager

BREAKING CHANGE:
---
 .../{dir_packager => directory_packager}/Dockerfile           | 2 +-
 .../{dir_packager => directory_packager}/README.md            | 2 +-
 .../{dir_packager => directory_packager}/__init__.py          | 0
 .../directory_packager.py}                                    | 0
 .../{dir_packager_test.py => directory_packager_test.py}      | 4 +++-
 5 files changed, 5 insertions(+), 3 deletions(-)
 rename src/datasetpreparator/{dir_packager => directory_packager}/Dockerfile (62%)
 rename src/datasetpreparator/{dir_packager => directory_packager}/README.md (97%)
 rename src/datasetpreparator/{dir_packager => directory_packager}/__init__.py (100%)
 rename src/datasetpreparator/{dir_packager/dir_packager.py => directory_packager/directory_packager.py} (100%)
 rename tests/test_cases/{dir_packager_test.py => directory_packager_test.py} (95%)

diff --git a/src/datasetpreparator/dir_packager/Dockerfile b/src/datasetpreparator/directory_packager/Dockerfile
similarity index 62%
rename from src/datasetpreparator/dir_packager/Dockerfile
rename to src/datasetpreparator/directory_packager/Dockerfile
index ef4b3ca..a470732 100644
--- a/src/datasetpreparator/dir_packager/Dockerfile
+++ b/src/datasetpreparator/directory_packager/Dockerfile
@@ -6,4 +6,4 @@ WORKDIR /app
 
 COPY . .
 
-CMD ["python3", "file_packager.py"]
+CMD ["python3", "directory_packager.py"]
diff --git a/src/datasetpreparator/dir_packager/README.md b/src/datasetpreparator/directory_packager/README.md
similarity index 97%
rename from src/datasetpreparator/dir_packager/README.md
rename to src/datasetpreparator/directory_packager/README.md
index be04547..03f14b8 100644
--- a/src/datasetpreparator/dir_packager/README.md
+++ b/src/datasetpreparator/directory_packager/README.md
@@ -6,7 +6,7 @@ Utility script for compressing a directory into a `.zip` archive. This script it
 
 Please keep in mind that the  ```src/dir_packager.py``` contains default flag values and can be customized with the following command line flags:
 ```
-Usage: dir_packager.py [OPTIONS]
+Usage: directory_packager.py [OPTIONS]
 
 Tool used for processing StarCraft 2 (SC2) datasets.
 with https://github.com/Kaszanas/SC2InfoExtractorGo
diff --git a/src/datasetpreparator/dir_packager/__init__.py b/src/datasetpreparator/directory_packager/__init__.py
similarity index 100%
rename from src/datasetpreparator/dir_packager/__init__.py
rename to src/datasetpreparator/directory_packager/__init__.py
diff --git a/src/datasetpreparator/dir_packager/dir_packager.py b/src/datasetpreparator/directory_packager/directory_packager.py
similarity index 100%
rename from src/datasetpreparator/dir_packager/dir_packager.py
rename to src/datasetpreparator/directory_packager/directory_packager.py
diff --git a/tests/test_cases/dir_packager_test.py b/tests/test_cases/directory_packager_test.py
similarity index 95%
rename from tests/test_cases/dir_packager_test.py
rename to tests/test_cases/directory_packager_test.py
index 1fbb8e4..52a470a 100644
--- a/tests/test_cases/dir_packager_test.py
+++ b/tests/test_cases/directory_packager_test.py
@@ -1,7 +1,9 @@
 import unittest
 import zipfile
 
-from datasetpreparator.dir_packager.dir_packager import multiple_dir_packager
+from datasetpreparator.directory_packager.directory_packager import (
+    multiple_dir_packager,
+)
 
 from tests.test_settings import (
     DELETE_SCRIPT_TEST_DIR,

From fe4a9149812dc5fb9b74fe9008ba28972d778ebb Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 01:31:37 +0100
Subject: [PATCH 27/92] fix: fixing paths in Dockerfile

---
 README.md             |  2 +-
 docker/Dockerfile     |  8 ++++----
 docker/Dockerfile.dev |  4 ++--
 makefile              | 30 ++++++++++++++++--------------
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 3056823..8f5d151 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ When using Docker, you will have to pass the arguments through the `docker run`
 ```bash
 docker run \
   -v "./processing:/app/processing" \
-  datasetpreparator \
+  datasetpreparator:latest \
   python3 ./src/datasetpreparator/directory_flattener/directory_flattener.py \
   --input_path /app/processing/directory_flattener/input \
   --output_path /app/processing/directory_flattener/output
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7fa99aa..1a8adaf 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,16 +1,16 @@
-# Built .exe replay parsing tool is required to run sc2_replaypack_processor
-# https://github.com/Kaszanas/SC2InfoExtractorGo
 
 ARG PYTHON_VERSION=3.11
 
+# Built .exe replay parsing tool is required to run sc2_replaypack_processor
+# https://github.com/Kaszanas/SC2InfoExtractorGo
 FROM kaszanas/sc2infoextractorgo:latest AS extractor
 
-FROM python:${PYTHON_VERSION}-alpine
+FROM python:${PYTHON_VERSION}-alpine AS build
 
 WORKDIR /app
 
 # Copying the replay parsing tool:
-COPY --from=extractor /SC2InfoExtractorGo /SC2InfoExtractorGo
+COPY --from=extractor /app/SC2InfoExtractorGo /app/SC2InfoExtractorGo
 
 # Copy only what is required to install the project:
 COPY pyproject.toml poetry.lock ci/install_poetry.py /app/
diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
index 67f98ab..31090ee 100644
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@@ -1,8 +1,8 @@
-# Built .exe replay parsing tool is required to run sc2_replaypack_processor
-# https://github.com/Kaszanas/SC2InfoExtractorGo
 
 ARG PYTHON_VERSION=3.10.11
 
+# Built .exe replay parsing tool is required to run sc2_replaypack_processor
+# https://github.com/Kaszanas/SC2InfoExtractorGo
 FROM python:${PYTHON_VERSION} AS base
 
 # Install other dependencies such as git, etc. This is needed for
diff --git a/makefile b/makefile
index be384ef..a44270b 100644
--- a/makefile
+++ b/makefile
@@ -11,20 +11,20 @@ TEST_COMMAND = "poetry run pytest --durations=100 --ignore-glob='test_*.py' test
 ###################
 sc2reset_sc2egset: ## Runs the entire processing pipeline to recreate SC2ReSet and SC2EGSet or any other dataset using our standard tooling.
 	@make flatten
-	@make process_replaypack
+	@make process_replaypacks
 	@make rename_files
-	@make package_dataset
+	@make package_sc2egset_dataset
 
 flatten: ## Flattens the directory if the files are held in nested directories. This helps with streamlining the processing.
 	docker run \
 		-v "${PWD}/processing:/app/processing" \
-		datasetpreparator \
+		datasetpreparator:latest \
 		python3 directory_flattener.py
 
 json_merge: ## Merges two JSON files.
 	docker run \
 		-v "${PWD}/processing:/app/processing" \
-		datasetpreparator \
+		datasetpreparator:latest \
 		python3 json_merger.py \
 		--json_one=../processing/json_merger/map_translation.json \
 		--json_two=../processing/json_merger/new_maps_processed.json
@@ -32,13 +32,13 @@ json_merge: ## Merges two JSON files.
 download_maps: ## Runs over directories with .SC2Replay files and downloads maps that were used in the games.
 	docker run \
 		-v "${PWD}/processing:/app/processing" \
-		datasetpreparator \
+		datasetpreparator:latest \
 		python3 sc2_map_downloader.py
 
-process_replaypack: ## Parses the raw (.SC2Replay) data into JSON files.
+process_replaypacks: ## Parses the raw (.SC2Replay) data into JSON files.
 	docker run \
 		-v "${PWD}/processing:/app/processing" \
-		datasetpreparator \
+		datasetpreparator:latest \
 		python3 sc2_replaypack_processor.py \
 		--n_processes 8 \
 		--perform_chat_anonymization "true"
@@ -46,21 +46,23 @@ process_replaypack: ## Parses the raw (.SC2Replay) data into JSON files.
 rename_files: ## Renames the files after processing with SC2InfoExtractorGo.
 	docker run \
 		-v "${PWD}/processing:/app/processing" \
-		datasetpreparator \
+		datasetpreparator:latest \
 		python3 file_renamer.py \
 		--input_dir ../processing/sc2_replaypack_processor/output
 
-package_reset_dataset: ## Packages the raw data. Used to prepare SC2ReSet Replaypack set.
+package_sc2reset_dataset: ## Packages the raw data. Used to prepare SC2ReSet Replaypack set.
 	docker run \
 		-v "${PWD}/processing:/app/processing" \
-		datasetpreparator \
-		python3 file_packager.py --input_dir ../processing/directory_flattener/output
+		datasetpreparator:latest \
+		python3 src/datasetpreparator/ file_packager.py \
+		--input_dir ../processing/directory_flattener/output
 
-package_dataset: ## Packages the pre-processed dataset from the output of datasetpreparator. Used to prepare SC2EGSet Dataset.
+package_sc2egset_dataset: ## Packages the pre-processed dataset from the output of datasetpreparator. Used to prepare SC2EGSet Dataset.
 	docker run \
 		-v "${PWD}/processing:/app/processing" \
-		datasetpreparator \
-		python3 file_packager.py --input_dir ../processing/sc2_replaypack_processor/output
+		datasetpreparator:latest \
+		python3 file_packager.py \
+		--input_dir ../processing/sc2_replaypack_processor/output
 
 ###################
 #### DOCKER #######

From a93e82dcd09cd0308e2ae94282aa7887d8667d16 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 01:52:31 +0100
Subject: [PATCH 28/92] fix: mounting curdir as a dot

---
 makefile | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/makefile b/makefile
index a44270b..ebf248e 100644
--- a/makefile
+++ b/makefile
@@ -2,7 +2,6 @@ DOCKER_DIR = ./docker
 TEST_COMPOSE = $(DOCKER_DIR)/docker-test-compose.yml
 
 PYTHON_VERSION = 3.11
-PWD := ${CURDIR}
 
 TEST_COMMAND = "poetry run pytest --durations=100 --ignore-glob='test_*.py' tests --cov=datasetpreparator --cov-report term-missing --cov-report html 2>&1 | tee /app/logs/test_output.log"
 
@@ -17,13 +16,13 @@ sc2reset_sc2egset: ## Runs the entire processing pipeline to recreate SC2ReSet a
 
 flatten: ## Flattens the directory if the files are held in nested directories. This helps with streamlining the processing.
 	docker run \
-		-v "${PWD}/processing:/app/processing" \
+		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 directory_flattener.py
 
 json_merge: ## Merges two JSON files.
 	docker run \
-		-v "${PWD}/processing:/app/processing" \
+		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 json_merger.py \
 		--json_one=../processing/json_merger/map_translation.json \
@@ -31,13 +30,13 @@ json_merge: ## Merges two JSON files.
 
 download_maps: ## Runs over directories with .SC2Replay files and downloads maps that were used in the games.
 	docker run \
-		-v "${PWD}/processing:/app/processing" \
+		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 sc2_map_downloader.py
 
 process_replaypacks: ## Parses the raw (.SC2Replay) data into JSON files.
 	docker run \
-		-v "${PWD}/processing:/app/processing" \
+		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 sc2_replaypack_processor.py \
 		--n_processes 8 \
@@ -45,21 +44,21 @@ process_replaypacks: ## Parses the raw (.SC2Replay) data into JSON files.
 
 rename_files: ## Renames the files after processing with SC2InfoExtractorGo.
 	docker run \
-		-v "${PWD}/processing:/app/processing" \
+		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 file_renamer.py \
 		--input_dir ../processing/sc2_replaypack_processor/output
 
 package_sc2reset_dataset: ## Packages the raw data. Used to prepare SC2ReSet Replaypack set.
 	docker run \
-		-v "${PWD}/processing:/app/processing" \
+		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 src/datasetpreparator/ file_packager.py \
 		--input_dir ../processing/directory_flattener/output
 
 package_sc2egset_dataset: ## Packages the pre-processed dataset from the output of datasetpreparator. Used to prepare SC2EGSet Dataset.
 	docker run \
-		-v "${PWD}/processing:/app/processing" \
+		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 file_packager.py \
 		--input_dir ../processing/sc2_replaypack_processor/output
@@ -81,7 +80,7 @@ docker_build_dev: ## Builds the development image containing all of the tools.
 
 docker_run_test: ## Runs the test command using Docker.
 	docker run \
-		-v "${PWD}:/app" \
+		-v ".:/app" \
 		-e "TEST_WORKSPACE=/app" \
 		datasetpreparator:devcontainer \
 		sh -c \
@@ -89,7 +88,7 @@ docker_run_test: ## Runs the test command using Docker.
 
 docker_run_dev: ## Runs the development image containing all of the tools.
 	docker run \
-		-v "${PWD}:/app" \
+		-v ".:/app" \
 		-it \
 		-e "TEST_WORKSPACE=/app" \
 		datasetpreparator:devcontainer \
@@ -106,13 +105,13 @@ doc_build: ## Builds the Mkdocs documentation.
 
 docker_doc_build: ## Builds the Mkdocs documentation using Docker.
 	docker run \
-		-v "${PWD}/docs:/docs" \
+		-v "./docs:/docs" \
 		datasetpreparator:devcontainer \
 		poetry run mkdocs build
 
 docker_doc_build_action: ## Builds the Mkdocs documentation using Docker.
 	docker run \
-		-v "${PWD}/docs:/docs" \
+		-v "./docs:/docs" \
 		datasetpreparator:devcontainer \
 		poetry run mkdocs build
 
@@ -121,7 +120,7 @@ docker_doc_build_action: ## Builds the Mkdocs documentation using Docker.
 ###################
 docker_pre_commit: ## Runs pre-commit hooks using Docker.
 	docker run \
-		-v "${PWD}:/app" \
+		-v ".:/app" \
 		datasetpreparator:devcontainer \
 		pre-commit run --all-files
 

From c00b38e95113faf58ca205f5c0098e247c36dec1 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 17:27:35 +0100
Subject: [PATCH 29/92] test: added dotenv to set TEST_WORKSPACE

---
 .env.template      |  1 +
 .gitignore         |  2 ++
 poetry.lock        | 16 +++++++++++++++-
 pyproject.toml     |  1 +
 tests/test_main.py |  8 +++++++-
 5 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 .env.template

diff --git a/.env.template b/.env.template
new file mode 100644
index 0000000..5346740
--- /dev/null
+++ b/.env.template
@@ -0,0 +1 @@
+TEST_WORKSPACE=
diff --git a/.gitignore b/.gitignore
index 9aa1056..fbada7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,3 +34,5 @@ ruff_cache/
 
 # PyCharm
 /.idea
+
+.env
diff --git a/poetry.lock b/poetry.lock
index a169519..de4d3d8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -928,6 +928,20 @@ files = [
 [package.dependencies]
 six = ">=1.5"
 
+[[package]]
+name = "python-dotenv"
+version = "1.0.1"
+description = "Read key-value pairs from a .env file and set them as environment variables"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
+    {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
+]
+
+[package.extras]
+cli = ["click (>=5.0)"]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.1"
@@ -1384,4 +1398,4 @@ all = ["requests", "sc2reader", "tqdm"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "9b4595f0e3dac1958b3d9b2f868c96ac762ab691f3f250cfe5332b7f00207690"
+content-hash = "f563a3f794c497d3da2d5094f42cb142b15d1fa6b34c2bce997921e7de459cdf"
diff --git a/pyproject.toml b/pyproject.toml
index 2e86a4b..8878a7a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ mkdocs-gen-files = "^0.5.0"
 mkdocs-literate-nav = "^0.6.1"
 mkdocs-section-index = "^0.3.8"
 commitizen = "^3.13.0"
+python-dotenv = "^1.0.1"
 
 [tool.poetry.extras]
 all = ["requests", "tqdm", "sc2reader"]
diff --git a/tests/test_main.py b/tests/test_main.py
index 31d417a..a46355f 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -4,8 +4,14 @@
 import os
 import logging
 
+from dotenv import load_dotenv
+
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
+load_dotenv()
+
+WORKSPACE_DIRECTORY = os.getenv("TEST_WORKSPACE")
+
 from tests.test_utils import get_workspace_dir  # noqa: E402
 
 
@@ -15,7 +21,7 @@ def suite():
     start_dir = os.path.join(get_workspace_dir(), "tests")
     suite = unittest.TestLoader().discover(
         start_dir=start_dir,
-        pattern="*_test.py",  # *_test.py
+        pattern="sc2reset_replaypack_downloader_test.py",  # *_test.py
     )
     return suite
 

From 046ff3119bfde625a90c11618b104d1567662a15 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 17:28:14 +0100
Subject: [PATCH 30/92] refactor: refreshed ci installing poetry

---
 ci/install_poetry.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/install_poetry.py b/ci/install_poetry.py
index 4a96007..3ff49aa 100644
--- a/ci/install_poetry.py
+++ b/ci/install_poetry.py
@@ -23,6 +23,7 @@
 
 For full documentation, visit https://python-poetry.org/docs/#installation.
 """  # noqa: E501
+
 import sys
 
 

From af14698863f546b7973993b662f663a1517f51af Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 17:28:47 +0100
Subject: [PATCH 31/92] build: bumped poetry version in Dockerfile

---
 docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 1a8adaf..b4ff01f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -18,7 +18,7 @@ COPY pyproject.toml poetry.lock ci/install_poetry.py /app/
 # Install poetry
 # TODO: this is rather ugly, we are installing poetry into the release Docker build. Use multi-stage builds instead.
 ENV POETRY_HOME=/opt/poetry
-RUN python3 install_poetry.py --version 1.8.2 && \
+RUN python3 install_poetry.py --version 1.8.4 && \
     $POETRY_HOME/bin/poetry --version
 
 # Install only dependencies without installing current project:

From e1b1349500de3fffbcd3607f232f60087f79b1e1 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 17:29:51 +0100
Subject: [PATCH 32/92] test: commented out test, file_renamer_test not ready

---
 tests/test_cases/file_renamer_test.py | 80 +++++++++++++--------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/tests/test_cases/file_renamer_test.py b/tests/test_cases/file_renamer_test.py
index d9ba4b1..23484d8 100644
--- a/tests/test_cases/file_renamer_test.py
+++ b/tests/test_cases/file_renamer_test.py
@@ -1,40 +1,40 @@
-import unittest
-
-from datasetpreparator.file_renamer.file_renamer import file_renamer
-
-from tests.test_settings import (
-    DELETE_SCRIPT_TEST_DIR,
-    DELETE_SCRIPT_TEST_OUTPUT_DIR,
-    DELETE_SCRIPT_TEST_INPUT_DIR,
-)
-
-from tests.test_utils import (
-    create_script_test_input_dir,
-    create_script_test_output_dir,
-    dir_test_cleanup,
-)
-
-
-class FileRenamerTest(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls) -> None:
-        cls.SCRIPT_NAME = "file_renamer"
-        # Create and get test input and output directories:
-        cls.input_path = create_script_test_input_dir(script_name=cls.SCRIPT_NAME)
-        cls.output_path = create_script_test_output_dir(script_name=cls.SCRIPT_NAME)
-
-        # TODO: Mock the directory structure:
-
-    def test_file_renamer(self) -> None:
-        file_renamer(input_path=self.input_path)
-        # TODO: Assert that the files with old name do not exist.
-        # TODO: Assert that files with new names exist.
-
-    @classmethod
-    def tearDownClass(cls) -> None:
-        dir_test_cleanup(
-            script_name=cls.SCRIPT_NAME,
-            delete_script_test_dir_bool=DELETE_SCRIPT_TEST_DIR,
-            delete_script_test_input_bool=DELETE_SCRIPT_TEST_INPUT_DIR,
-            delete_script_test_output_bool=DELETE_SCRIPT_TEST_OUTPUT_DIR,
-        )
+# import unittest
+
+# from datasetpreparator.file_renamer.file_renamer import file_renamer
+
+# from tests.test_settings import (
+#     DELETE_SCRIPT_TEST_DIR,
+#     DELETE_SCRIPT_TEST_OUTPUT_DIR,
+#     DELETE_SCRIPT_TEST_INPUT_DIR,
+# )
+
+# from tests.test_utils import (
+#     create_script_test_input_dir,
+#     create_script_test_output_dir,
+#     dir_test_cleanup,
+# )
+
+
+# class FileRenamerTest(unittest.TestCase):
+#     @classmethod
+#     def setUpClass(cls) -> None:
+#         cls.SCRIPT_NAME = "file_renamer"
+#         # Create and get test input and output directories:
+#         cls.input_path = create_script_test_input_dir(script_name=cls.SCRIPT_NAME)
+#         cls.output_path = create_script_test_output_dir(script_name=cls.SCRIPT_NAME)
+
+#         # TODO: Mock the directory structure:
+
+#     def test_file_renamer(self) -> None:
+#         file_renamer(input_path=self.input_path)
+#         # TODO: Assert that the files with old name do not exist.
+#         # TODO: Assert that files with new names exist.
+
+#     @classmethod
+#     def tearDownClass(cls) -> None:
+#         dir_test_cleanup(
+#             script_name=cls.SCRIPT_NAME,
+#             delete_script_test_dir_bool=DELETE_SCRIPT_TEST_DIR,
+#             delete_script_test_input_bool=DELETE_SCRIPT_TEST_INPUT_DIR,
+#             delete_script_test_output_bool=DELETE_SCRIPT_TEST_OUTPUT_DIR,
+#         )

From b500fd6635e781abd35e40d1bf930ce9270137a5 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:18:56 +0100
Subject: [PATCH 33/92] feat: added default flag values for golang

---
 .../utils/replaypack_processor_args.py        | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
index cefd9ee..1575fca 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
@@ -58,19 +58,20 @@ class SC2InfoExtractorGoArguments:
 
     def __init__(
         self,
-        game_mode_filter: int,
-        processing_input: Path,
-        log_dir: Path,
-        log_level: int,
-        number_of_packages: int,
-        only_map_download: bool,
-        output: Path,
-        perform_chat_anonymization: bool,
-        perform_cleanup: bool,
-        perform_filtering: bool,
-        perform_integrity_checks: bool,
-        perform_player_anonymization: bool,
-        perform_validity_checks: bool,
+        processing_input: Path = Path("./replays/input"),
+        output: Path = Path("./replays/output"),
+        game_mode_filter: int = 0b11111111,
+        log_dir: Path = Path("./logs/"),
+        log_level: int = 4,
+        number_of_packages: int = 1,
+        only_map_download: bool = False,
+        maps_directory: Path = Path("./maps/"),
+        perform_chat_anonymization: bool = False,
+        perform_cleanup: bool = False,
+        perform_filtering: bool = False,
+        perform_integrity_checks: bool = False,
+        perform_player_anonymization: bool = False,
+        perform_validity_checks: bool = False,
         max_procs: int = os.cpu_count(),
     ):
         self.game_mode_filter = game_mode_filter
@@ -80,6 +81,7 @@ def __init__(
         self.max_procs = max_procs
         self.number_of_packages = number_of_packages
         self.only_map_download = "true" if only_map_download else "false"
+        self.maps_directory = maps_directory
         self.output = output
         self.perform_chat_anonymization = (
             "true" if perform_chat_anonymization else "false"

From 28ee7461648e5ff65d6dbd717983406240d7cc56 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:42:28 +0100
Subject: [PATCH 34/92] fix: fixing imports in sc2reset

---
 tests/test_cases/sc2reset_replaypack_downloader_test.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/test_cases/sc2reset_replaypack_downloader_test.py b/tests/test_cases/sc2reset_replaypack_downloader_test.py
index e2fbb0b..5b3972f 100644
--- a/tests/test_cases/sc2reset_replaypack_downloader_test.py
+++ b/tests/test_cases/sc2reset_replaypack_downloader_test.py
@@ -1,6 +1,10 @@
 from pathlib import Path
 import unittest
-from datasetpreparator.sc2.sc2reset_replaypack_downloader.get_md5 import get_md5
+
+from datasetpreparator.sc2.sc2reset_replaypack_downloader.utils.get_md5 import (
+    get_md5,
+)
+
 from datasetpreparator.sc2.sc2reset_replaypack_downloader.sc2reset_replaypack_downloader import (
     sc2reset_replaypack_downloader,
 )

From c6e5c496d8078733da8e78a2d9c3335b2c18632d Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:43:21 +0100
Subject: [PATCH 35/92] test: added extractor arguments in test

---
 .../test_cases/sc2egset_replaypack_processor_test.py  | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/test_cases/sc2egset_replaypack_processor_test.py b/tests/test_cases/sc2egset_replaypack_processor_test.py
index 24fc97f..40a0dac 100644
--- a/tests/test_cases/sc2egset_replaypack_processor_test.py
+++ b/tests/test_cases/sc2egset_replaypack_processor_test.py
@@ -4,6 +4,9 @@
     sc2egset_replaypack_processor,
 )
 
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
+    ReplaypackProcessorArguments,
+)
 from tests.test_settings import (
     DELETE_SCRIPT_TEST_DIR,
     DELETE_SCRIPT_TEST_OUTPUT_DIR,
@@ -25,7 +28,7 @@
 class SC2ReplaypackProcessorTest(unittest.TestCase):
     @classmethod
     def setUpClass(cls) -> None:
-        cls.SCRIPT_NAME = "sc2_replaypack_processor"
+        cls.SCRIPT_NAME = "sc2egset_replaypack_processor"
         # Create and get test input and output directories:
         cls.input_path = create_script_test_input_dir(script_name=cls.SCRIPT_NAME)
         cls.output_path = create_script_test_output_dir(script_name=cls.SCRIPT_NAME)
@@ -44,12 +47,14 @@ def test_sc2_replaypack_processor(self):
         # Maybe a pytest marker to skip this test?
         # Should this even be tested given that the SC2InfoExtractorGo will have its own tests?
         # This script is only providing a multiprocessing wrapper for the SC2InfoExtractorGo.
-        sc2egset_replaypack_processor(
+
+        arguments = ReplaypackProcessorArguments(
             input_path=self.input_path,
             output_path=self.output_path,
             n_processes=1,
-            perform_chat_anonymization=False,
         )
+
+        sc2egset_replaypack_processor(arguments=arguments)
         # TODO: Check if output contains the same directories as for input.
         # TODO: Check if outputs contain extracted JSON files with valid fields.
 

From a3a31c7190c144ed65076a164ee96bd5c429d975 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:45:02 +0100
Subject: [PATCH 36/92] fix: fixing opening and writing to file

---
 .../directory_flattener/directory_flattener.py  | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index ea8945c..7de18b6 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -13,19 +13,22 @@
 from datasetpreparator.settings import LOGGING_FORMAT
 
 
-def save_dir_mapping(output_path: str, dir_mapping: dict) -> None:
+def save_dir_mapping(output_path: Path, dir_mapping: dict) -> None:
     """
     Saves a JSON file containing the mapping of the
     directory structure before it was "flattened".
 
     Parameters
     ----------
-    output_path : str
+    output_path : Path
         Specifies the path where the mapping will be saved.
     dir_mapping : dict
         Specifies the directory mapping dict.
     """
-    with open(os.path.join(output_path, "processed_mapping.json"), "w") as json_file:
+
+    path_to_mapping = Path(output_path, "processed_mapping.json").resolve()
+
+    with path_to_mapping.open("w") as json_file:
         json.dump(dir_mapping, json_file)
 
 
@@ -106,7 +109,9 @@ def directory_flatten(
         logging.debug(f"File copied to {new_path_and_filename.as_posix()}")
 
         # Finding the relative path from the root directory to the file:
-        dir_structure_mapping[new_path_and_filename.name] = root_dir_name_and_file
+        dir_structure_mapping[
+            new_path_and_filename.name
+        ] = root_dir_name_and_file.as_posix()
 
     return dir_structure_mapping
 
@@ -244,7 +249,9 @@ def main(input_path: Path, output_path: Path, file_extension: str, log: str) ->
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
     multiple_directory_flattener(
-        input_path=input_path, output_path=output_path, file_extension=file_extension
+        input_path=input_path,
+        output_path=output_path,
+        file_extension=file_extension,
     )
 
 

From 9203288fcb79337f1e32c36d9d2e50a62bfb7aef Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:46:23 +0100
Subject: [PATCH 37/92] feat: sc2infoextractorgo executable path in settings

---
 .../sc2egset_replaypack_processor/utils/multiprocess.py    | 7 ++++---
 src/datasetpreparator/settings.py                          | 6 ++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
index 3c1f319..7eaa6c0 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
@@ -10,6 +10,8 @@
     SC2InfoExtractorGoArguments,
 )
 
+from datasetpreparator.settings import PATH_TO_SC2INFOEXTRACTORGO
+
 
 def multiprocessing_scheduler(
     processing_arguments: List[SC2InfoExtractorGoArguments], number_of_processes: int
@@ -66,7 +68,7 @@ def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
     subprocess.run(
         [
             # FIXME hardcoded binary name
-            "/SC2InfoExtractorGo",
+            str(PATH_TO_SC2INFOEXTRACTORGO),
             f"-input={arguments.processing_input}/",
             f"-output={arguments.output}/",
             f"-perform_integrity_checks={arguments.perform_integrity_checks}",
@@ -99,8 +101,7 @@ def pre_process_download_maps(arguments: SC2InfoExtractorGoArguments) -> None:
 
     subprocess.run(
         [
-            # FIXME hardcoded binary name
-            "/SC2InfoExtractorGo",
+            str(PATH_TO_SC2INFOEXTRACTORGO),
             f"-input={arguments.processing_input}/",
             f"-output={arguments.output}/",
             "-only_map_download=true",
diff --git a/src/datasetpreparator/settings.py b/src/datasetpreparator/settings.py
index 1490f54..07a1290 100644
--- a/src/datasetpreparator/settings.py
+++ b/src/datasetpreparator/settings.py
@@ -1 +1,7 @@
+import os
+from pathlib import Path
+
+
 LOGGING_FORMAT = "[%(asctime)s][%(process)d/%(thread)d][%(levelname)s][%(filename)s:%(lineno)s] - %(message)s"
+
+PATH_TO_SC2INFOEXTRACTORGO = Path(os.getcwd(), "SC2InfoExtractorGo").resolve()

From 6d7447d693f52ec129194de4b0e317169561ac7f Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:47:21 +0100
Subject: [PATCH 38/92] fix: fixing return value, removed range loop

---
 .../sc2reset_replaypack_downloader.py         | 27 +++++++++----------
 .../utils/download_replaypack.py              |  2 +-
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
index 8140302..988e6cf 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
@@ -50,20 +50,19 @@ def sc2reset_replaypack_downloader(
     # Download replaypacks:
     downloaded_paths: List[Tuple[str, str]] = []
     for replaypack_name, replaypack_url, file_md5 in replaypack_list:
-        for _ in range(2):
-            downloaded_replaypack_path, ok = download_replaypack(
-                destination_dir=download_path,
-                replaypack_name=replaypack_name,
-                replaypack_url=replaypack_url,
-                replaypack_md5=file_md5,
-            )
-            # If the download was succesful, break out of the inner loop:
-            if ok:
-                downloaded_paths.append((replaypack_name, downloaded_replaypack_path))
-                break
-            logging.error(
-                f"Replaypack {replaypack_name} could not be downloaded. Adding to retry list..."
-            )
+        downloaded_replaypack_path, ok = download_replaypack(
+            destination_dir=download_path,
+            replaypack_name=replaypack_name,
+            replaypack_url=replaypack_url,
+            replaypack_md5=file_md5,
+        )
+        # If the download was succesful, break out of the inner loop:
+        if ok:
+            downloaded_paths.append((replaypack_name, downloaded_replaypack_path))
+            break
+        logging.error(
+            f"Replaypack {replaypack_name} could not be downloaded. Adding to retry list..."
+        )
 
     # Unpack replaypacks:
     for replaypack_name, downloaded_replaypack_path in downloaded_paths:
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/utils/download_replaypack.py b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/utils/download_replaypack.py
index e11f013..e7f0eb8 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/utils/download_replaypack.py
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/utils/download_replaypack.py
@@ -76,7 +76,7 @@ def download_replaypack(
     if download_filepath.exists():
         md5_checksum = get_md5(file=download_filepath)
         if md5_checksum == replaypack_md5:
-            return download_filepath
+            return download_filepath, True
 
     if not destination_dir.exists():
         destination_dir.mkdir()

From 06d5513a32733662c03b7c1132da3c7d8a1eb786 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:47:51 +0100
Subject: [PATCH 39/92] build: adjusted dockerfiles, copying files separately

---
 docker/Dockerfile     |  7 ++++++-
 docker/Dockerfile.dev | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index b4ff01f..07596a0 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -10,7 +10,10 @@ FROM python:${PYTHON_VERSION}-alpine AS build
 WORKDIR /app
 
 # Copying the replay parsing tool:
+# sc2egset_replaypack_processor requires the .exe file to be in the same directory as the script:
 COPY --from=extractor /app/SC2InfoExtractorGo /app/SC2InfoExtractorGo
+# Ensure the executable has the right permissions
+RUN chmod +x /app/SC2InfoExtractorGo
 
 # Copy only what is required to install the project:
 COPY pyproject.toml poetry.lock ci/install_poetry.py /app/
@@ -22,7 +25,9 @@ RUN python3 install_poetry.py --version 1.8.4 && \
     $POETRY_HOME/bin/poetry --version
 
 # Install only dependencies without installing current project:
-RUN $POETRY_HOME/bin/poetry config virtualenvs.create false && $POETRY_HOME/bin/poetry install --no-root
+RUN $POETRY_HOME/bin/poetry \
+    config virtualenvs.create false \
+    && $POETRY_HOME/bin/poetry install --no-root
 
 # Copy entire repository contents
 COPY . .
diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
index 31090ee..8a71d0c 100644
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@@ -3,8 +3,12 @@ ARG PYTHON_VERSION=3.10.11
 
 # Built .exe replay parsing tool is required to run sc2_replaypack_processor
 # https://github.com/Kaszanas/SC2InfoExtractorGo
+FROM kaszanas/sc2infoextractorgo:latest AS extractor
+
 FROM python:${PYTHON_VERSION} AS base
 
+ENV TEST_WORKSPACE=/app
+
 # Install other dependencies such as git, etc. This is needed for
 # pre-commit hooks to work and other QOL stuff.
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -12,6 +16,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 
 WORKDIR /app
 
+# Copying the replay parsing tool:
+# sc2egset_replaypack_processor requires the .exe file to be in the same directory as the script:
+COPY --from=extractor /app/SC2InfoExtractorGo /app/SC2InfoExtractorGo
+# Ensure the executable has the right permissions:
+RUN chmod +x /app/SC2InfoExtractorGo
+
 # Install poetry
 RUN pip install poetry
 
@@ -21,8 +31,27 @@ COPY pyproject.toml poetry.lock /app/
 # Install only dependencies without installing current project:
 RUN poetry config virtualenvs.create false && poetry install --no-root
 
+# REVIEW: This is not the best way to build... This copies some old test files:
 # Copy entire repository contents
-COPY . .
+# COPY . .
+
+# Required to install pre-commit in future steps:
+COPY .git/ /app/.git/
+
+# Copy test files:
+COPY /src/ /app/src/
+COPY /tests/__init__.py /app/tests/__init__.py
+COPY /tests/conftest.py /app/tests/conftest.py
+COPY /tests/test_utils.py /app/tests/test_utils.py
+COPY /tests/test_settings.py /app/tests/test_settings.py
+COPY /tests/test_main.py /app/tests/test_main.py
+COPY /tests/test_cases/ /app/tests/test_cases/
+RUN mkdir -p /tests/test_files/
+
+# Copy docs files:
+COPY /docs/ /app/docs/
+COPY mkdocs.yml /app/mkdocs.yml
+COPY README.md /app/README.md
 
 RUN poetry run pre-commit install
 

From 748f8406f24a111743b14082fe4ba8f929bd1ccc Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:48:28 +0100
Subject: [PATCH 40/92] feat: test workspace in .env

---
 .env.template | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.env.template b/.env.template
index 5346740..ba96581 100644
--- a/.env.template
+++ b/.env.template
@@ -1 +1,2 @@
+# To have imports resolve correctly this should be the path to the root of the project:
 TEST_WORKSPACE=

From 74236e09474bdd0349e201ce2055a804498ff27e Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 20:48:51 +0100
Subject: [PATCH 41/92] test: adjusted test target in make

---
 makefile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/makefile b/makefile
index ebf248e..9d99c23 100644
--- a/makefile
+++ b/makefile
@@ -79,9 +79,7 @@ docker_build_dev: ## Builds the development image containing all of the tools.
 	--tag=datasetpreparator:devcontainer
 
 docker_run_test: ## Runs the test command using Docker.
-	docker run \
-		-v ".:/app" \
-		-e "TEST_WORKSPACE=/app" \
+	docker run --rm \
 		datasetpreparator:devcontainer \
 		sh -c \
 		$(TEST_COMMAND)

From 9ce61935c0a747f70d8ed0c9d6ca2fceba7a8785 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 21:02:15 +0100
Subject: [PATCH 42/92] fix: fixing pre-commit in dev docker

---
 docker/Dockerfile.dev | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
index 8a71d0c..0896c45 100644
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@@ -35,6 +35,9 @@ RUN poetry config virtualenvs.create false && poetry install --no-root
 # Copy entire repository contents
 # COPY . .
 
+# Required for CI and pre-commit locally:
+COPY .pre-commit-config.yaml /app/.pre-commit-config.yaml
+
 # Required to install pre-commit in future steps:
 COPY .git/ /app/.git/
 

From e3ba2b2c65991de6f831a4130133e4bc67a688dd Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 21:08:16 +0100
Subject: [PATCH 43/92] ci: removing volume from docker-test-compose

---
 docker/docker-test-compose.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docker/docker-test-compose.yml b/docker/docker-test-compose.yml
index 987a5f0..d93f726 100644
--- a/docker/docker-test-compose.yml
+++ b/docker/docker-test-compose.yml
@@ -4,8 +4,6 @@ services:
     build:
       context: '../'
       dockerfile: './docker/Dockerfile.dev'
-    volumes:
-      - ../:/app
     environment:
       - TEST_WORKSPACE=/app
 volumes:

From e39f5c1a71886ce2d5668f2343002430727a6404 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 21:08:43 +0100
Subject: [PATCH 44/92] build: copying CONTRIBUTING to dev docker image

---
 docker/Dockerfile.dev | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
index 0896c45..74364f4 100644
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@@ -55,6 +55,7 @@ RUN mkdir -p /tests/test_files/
 COPY /docs/ /app/docs/
 COPY mkdocs.yml /app/mkdocs.yml
 COPY README.md /app/README.md
+COPY CONTRIBUTING.md /app/CONTRIBUTING.md
 
 RUN poetry run pre-commit install
 

From 7f67860139574bbca6c09327f2f973d004d40972 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 21:39:25 +0100
Subject: [PATCH 45/92] ci: adjusted TEST_COMMAND, not writing logs

---
 makefile | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/makefile b/makefile
index 9d99c23..722e524 100644
--- a/makefile
+++ b/makefile
@@ -3,7 +3,9 @@ TEST_COMPOSE = $(DOCKER_DIR)/docker-test-compose.yml
 
 PYTHON_VERSION = 3.11
 
-TEST_COMMAND = "poetry run pytest --durations=100 --ignore-glob='test_*.py' tests --cov=datasetpreparator --cov-report term-missing --cov-report html 2>&1 | tee /app/logs/test_output.log"
+TEST_COMMAND = "poetry run pytest --durations=100 --ignore-glob='test_*.py' tests --cov=datasetpreparator --cov-report term-missing --cov-report html 2>&1"
+
+TEST_COMMAND_LOG = "poetry run pytest --durations=100 --ignore-glob='test_*.py' tests --cov=datasetpreparator --cov-report term-missing --cov-report html 2>&1 | tee /app/logs/test_output.log"
 
 ###################
 #### PIPELINE #####
@@ -117,12 +119,14 @@ docker_doc_build_action: ## Builds the Mkdocs documentation using Docker.
 #### PRE-COMMIT ###
 ###################
 docker_pre_commit: ## Runs pre-commit hooks using Docker.
+	@make docker_build_dev
 	docker run \
 		-v ".:/app" \
 		datasetpreparator:devcontainer \
 		pre-commit run --all-files
 
 docker_pre_commit_action: ## Runs pre-commit hooks using Docker.
+	@make docker_build_dev
 	docker run \
 		datasetpreparator:devcontainer \
 		pre-commit run --all-files
@@ -135,7 +139,7 @@ compose_build: ## Builds the Docker Image with docker-compose.
 
 action_compose_test: ## Runs the tests using Docker.
 	docker compose -f $(TEST_COMPOSE) run --rm lib \
-	bash -c $(TEST_COMMAND)
+	bash -c $(TEST_COMMAND) --exit-code-from lib
 
 compose_remove: ## Stops and removes the testing containers, images, volumes.
 	docker-compose -f $(TEST_COMPOSE) down --volumes --remove-orphans

From 391fe5070e66f57112349c23745ef13b3fe296b0 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 22:13:09 +0100
Subject: [PATCH 46/92] build: copying scripts to top in docker images

---
 docker/Dockerfile     | 30 ++++++++++++++++++++++++++++++
 docker/Dockerfile.dev | 18 +++++++++++++++---
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 07596a0..7e4b75a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -32,5 +32,35 @@ RUN $POETRY_HOME/bin/poetry \
 # Copy entire repository contents
 COPY . .
 
+# Copy test files:
+COPY /src/ /app/src/
+COPY /tests/__init__.py /app/tests/__init__.py
+COPY /tests/conftest.py /app/tests/conftest.py
+COPY /tests/test_utils.py /app/tests/test_utils.py
+COPY /tests/test_settings.py /app/tests/test_settings.py
+COPY /tests/test_main.py /app/tests/test_main.py
+COPY /tests/test_cases/ /app/tests/test_cases/
+
+# Copy docs files:
+COPY /docs/ /app/docs/
+COPY mkdocs.yml /app/mkdocs.yml
+COPY README.md /app/README.md
+COPY CONTRIBUTING.md /app/CONTRIBUTING.md
+
+# Bring the scripts to the top level.
+# They import parts of the project but as long as the project is installed
+# in the same environment, they can run from anywhere as long as the environment
+# is activated.
+COPY /src/datasetpreparator/directory_flattener/directory_flattener.py \
+    /src/datasetpreparator/directory_packager/directory_packager.py \
+    /src/datasetpreparator/file_renamer/file_renamer.py \
+    /src/datasetpreparator/json_merger/json_merger.py \
+    /src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py \
+    /src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py \
+    /src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py \
+    /src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py \
+    /app/
+
+
 # Install current project:
 RUN $POETRY_HOME/bin/poetry install --all-extras
diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
index 74364f4..4257ed9 100644
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@@ -31,9 +31,6 @@ COPY pyproject.toml poetry.lock /app/
 # Install only dependencies without installing current project:
 RUN poetry config virtualenvs.create false && poetry install --no-root
 
-# REVIEW: This is not the best way to build... This copies some old test files:
-# Copy entire repository contents
-# COPY . .
 
 # Required for CI and pre-commit locally:
 COPY .pre-commit-config.yaml /app/.pre-commit-config.yaml
@@ -57,6 +54,21 @@ COPY mkdocs.yml /app/mkdocs.yml
 COPY README.md /app/README.md
 COPY CONTRIBUTING.md /app/CONTRIBUTING.md
 
+# Bring the scripts to the top level.
+# They import parts of the project but as long as the project is installed
+# in the same environment, they can run from anywhere as long as the environment
+# is activated.
+COPY /src/datasetpreparator/directory_flattener/directory_flattener.py \
+    /src/datasetpreparator/directory_packager/directory_packager.py \
+    /src/datasetpreparator/file_renamer/file_renamer.py \
+    /src/datasetpreparator/json_merger/json_merger.py \
+    /src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py \
+    /src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py \
+    /src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py \
+    /src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py \
+    /app/
+
+
 RUN poetry run pre-commit install
 
 # Install current project:

From 41caf7a41159ec7b67988779b3475ec87e2f25d3 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 22:52:58 +0100
Subject: [PATCH 47/92] docs: added info on pre-commit and commitizen, #34

---
 CONTRIBUTING.md | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6ae5d75..ae6a568 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -65,15 +65,27 @@ Ready to contribute? Here's how to set up `datasetpreparator` for local developm
   poetry install
 ```
 
-3. Use `git` (or similar) to create a branch for local development and make your changes:
+3. Install the pre-commit hooks:
+
+```console
+  poetry run pre-commit install
+```
+
+4. Use `git` (or similar) to create a branch for local development and make your changes:
 
 ```console
   git checkout -b name-of-your-bugfix-or-feature
 ```
 
-4. When you're done making changes, check that your changes conform to any code formatting requirements and pass any tests.
+5. When you're done making changes, check that your changes conform to any code formatting requirements and pass any tests.
+
+6. Format your commit with `commitizen`:
+
+```console
+  poetry run cz commit
+```
 
-5. Commit your changes and open a pull request.
+7. Commit your changes (we are using commitizen to check commit messages) and open a pull request.
 
 ## Pull Request Guidelines
 

From 985c6c256d63812b6ecd71fdd8910e6affe801a3 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 22:54:00 +0100
Subject: [PATCH 48/92] docs: added information on code standards, #34

---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ae6a568..ef27549 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -56,7 +56,7 @@ docker run -it -v .:/app datasetpreparator:devcontainer
 
 ### Local Development
 
-Ready to contribute? Here's how to set up `datasetpreparator` for local development.
+Ready to contribute? Here's how to set up `datasetpreparator` for local development. The code style standards that we use are defined in the `.pre-commit-config.yaml` file.
 
 1. Download a copy of `datasetpreparator` locally.
 2. Install `datasetpreparator` using `poetry`:

From e0d82da2d099021264887b38eda6a42c1b807382 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 23:03:13 +0100
Subject: [PATCH 49/92] docs: updated all README files for scripts

---
 README.md                                     |  2 +-
 .../directory_flattener/README.md             | 17 +++++++++--------
 .../directory_flattener.py                    |  2 +-
 .../directory_packager/README.md              | 16 ++++++++--------
 .../directory_packager/directory_packager.py  |  2 +-
 src/datasetpreparator/file_renamer/README.md  | 12 ++++++------
 .../file_renamer/file_renamer.py              |  2 +-
 src/datasetpreparator/json_merger/README.md   | 19 ++++++++++---------
 .../json_merger/json_merger.py                |  2 +-
 .../processed_mapping_copier/README.md        | 15 ++++++++-------
 .../processed_mapping_copier.py               |  2 +-
 .../sc2/sc2_map_downloader/README.md          |  6 +++---
 .../sc2_map_downloader/sc2_map_downloader.py  |  2 +-
 .../sc2egset_replaypack_processor/README.md   |  2 +-
 .../sc2egset_replaypack_processor.py          |  2 +-
 .../sc2reset_replaypack_downloader/README.md  |  5 ++---
 .../sc2reset_replaypack_downloader.py         |  2 +-
 17 files changed, 56 insertions(+), 54 deletions(-)

diff --git a/README.md b/README.md
index 8f5d151..b54d685 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ When using Docker, you will have to pass the arguments through the `docker run`
 docker run \
   -v "./processing:/app/processing" \
   datasetpreparator:latest \
-  python3 ./src/datasetpreparator/directory_flattener/directory_flattener.py \
+  python3 directory_flattener.py \
   --input_path /app/processing/directory_flattener/input \
   --output_path /app/processing/directory_flattener/output
 ```
diff --git a/src/datasetpreparator/directory_flattener/README.md b/src/datasetpreparator/directory_flattener/README.md
index 9b54481..b6c8984 100644
--- a/src/datasetpreparator/directory_flattener/README.md
+++ b/src/datasetpreparator/directory_flattener/README.md
@@ -14,14 +14,15 @@ old structure to a separate file, and for later processing with other tools.
 Created primarily to define StarCraft 2 (SC2) datasets.
 
 Options:
-  --input_path DIRECTORY    Please provide input path to the dataset that is
-                            going to be processed.  [required]
-  --output_path DIRECTORY   Please provide output path where the tool will put
-                            files after processing.  [required]
-  --file_extension TEXT     Specify file extension for the files that will be
-                            put to the top level directory.  [required]
-  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
-  --help                    Show this message and exit.
+  --input_path DIRECTORY         Please provide input path to the dataset that
+                                 is going to be processed.  [required]
+  --output_path DIRECTORY        Please provide output path where the tool
+                                 will put files after processing.  [required]
+  --file_extension TEXT          Specify file extension for the files that
+                                 will be put to the top level directory.
+                                 [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --help                         Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index 7de18b6..e466e13 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -240,7 +240,7 @@ def multiple_directory_flattener(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level (INFO, DEBUG, ERROR)",
+    help="Log level",
 )
 def main(input_path: Path, output_path: Path, file_extension: str, log: str) -> None:
     numeric_level = getattr(logging, log.upper(), None)
diff --git a/src/datasetpreparator/directory_packager/README.md b/src/datasetpreparator/directory_packager/README.md
index 03f14b8..1de8eb7 100644
--- a/src/datasetpreparator/directory_packager/README.md
+++ b/src/datasetpreparator/directory_packager/README.md
@@ -8,16 +8,16 @@ Please keep in mind that the  ```src/dir_packager.py``` contains default flag va
 ```
 Usage: directory_packager.py [OPTIONS]
 
-Tool used for processing StarCraft 2 (SC2) datasets.
-with https://github.com/Kaszanas/SC2InfoExtractorGo
+Tool used for processing StarCraft 2 (SC2) datasets. with
+https://github.com/Kaszanas/SC2InfoExtractorGo
 
 Options:
-  --input_path DIRECTORY    Please provide input path to the directory
-                            containing the dataset that is going to be
-                            processed by packaging into .zip archives.
-                            [required]
-  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
-  --help                    Show this message and exit.
+  --input_path DIRECTORY         Please provide input path to the directory
+                                 containing the dataset that is going to be
+                                 processed by packaging into .zip archives.
+                                 [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --help                         Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/directory_packager/directory_packager.py b/src/datasetpreparator/directory_packager/directory_packager.py
index 6bbd35b..9b94884 100644
--- a/src/datasetpreparator/directory_packager/directory_packager.py
+++ b/src/datasetpreparator/directory_packager/directory_packager.py
@@ -74,7 +74,7 @@ def dir_packager(directory_path: Path) -> Path:
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level (INFO, DEBUG, ERROR)",
+    help="Log level",
 )
 def main(input_path: Path, log: str):
     numeric_level = getattr(logging, log.upper(), None)
diff --git a/src/datasetpreparator/file_renamer/README.md b/src/datasetpreparator/file_renamer/README.md
index 3a3024f..f131774 100644
--- a/src/datasetpreparator/file_renamer/README.md
+++ b/src/datasetpreparator/file_renamer/README.md
@@ -10,12 +10,12 @@ Tool used for processing StarCraft 2 (SC2) datasets. with
 https://github.com/Kaszanas/SC2InfoExtractorGo
 
 Options:
-  --input_path DIRECTORY    Please provide input path to the directory
-                            containing the dataset that is going to be
-                            processed by packaging into .zip archives.
-                            [required]
-  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
-  --help                    Show this message and exit.
+  --input_path DIRECTORY         Please provide input path to the directory
+                                 containing the dataset that is going to be
+                                 processed by packaging into .zip archives.
+                                 [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --help                         Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/file_renamer/file_renamer.py b/src/datasetpreparator/file_renamer/file_renamer.py
index c1fb41c..b3271ba 100644
--- a/src/datasetpreparator/file_renamer/file_renamer.py
+++ b/src/datasetpreparator/file_renamer/file_renamer.py
@@ -88,7 +88,7 @@ def file_renamer(input_path: Path) -> None:
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level (INFO, DEBUG, ERROR)",
+    help="Log level",
 )
 def main(input_path: Path, log: str) -> None:
     numeric_level = getattr(logging, log.upper(), None)
diff --git a/src/datasetpreparator/json_merger/README.md b/src/datasetpreparator/json_merger/README.md
index 5524cda..a8a85fd 100644
--- a/src/datasetpreparator/json_merger/README.md
+++ b/src/datasetpreparator/json_merger/README.md
@@ -12,15 +12,16 @@ Tool used for merging two .json files. Created in order to merge two
 mappings created by https://github.com/Kaszanas/SC2MapLocaleExtractor
 
 Options:
-  --json_one FILE           Please provide the path to the first .json file
-                            that is going to be merged.  [required]
-  --json_two FILE           Please provide the path to the second .json file
-                            that is going to be merged.  [required]
-  --output_filepath FILE    Please provide a filepath to which the result JSON
-                            file will be saved, note that any existing file of
-                            the same name will be overwriten.  [required]
-  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
-  --help                    Show this message and exit.
+  --json_one FILE                Please provide the path to the first .json
+                                 file that is going to be merged.  [required]
+  --json_two FILE                Please provide the path to the second .json
+                                 file that is going to be merged.  [required]
+  --output_filepath FILE         Please provide a filepath to which the result
+                                 JSON file will be saved, note that any
+                                 existing file of the same name will be
+                                 overwriten.  [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --help                         Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/json_merger/json_merger.py b/src/datasetpreparator/json_merger/json_merger.py
index 3b9864f..0e5307a 100644
--- a/src/datasetpreparator/json_merger/json_merger.py
+++ b/src/datasetpreparator/json_merger/json_merger.py
@@ -121,7 +121,7 @@ def json_merger(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level (INFO, DEBUG, ERROR)",
+    help="Log level",
 )
 def main(
     path_to_json_one: Path, path_to_json_two: Path, output_filepath: Path, log: str
diff --git a/src/datasetpreparator/processed_mapping_copier/README.md b/src/datasetpreparator/processed_mapping_copier/README.md
index 8440f6e..d393cbb 100644
--- a/src/datasetpreparator/processed_mapping_copier/README.md
+++ b/src/datasetpreparator/processed_mapping_copier/README.md
@@ -13,13 +13,14 @@ after processing the replaypack into a JSON dataset. This step is required
 to define the StarCraft 2 (SC2) dataset.
 
 Options:
-  --input_path DIRECTORY    Please provide input path to the flattened
-                            replaypacks that contain procesed_mapping.json
-                            files.  [required]
-  --output_path DIRECTORY   Please provide output path where
-                            processed_mapping.json will be copied.  [required]
-  --log [INFO|DEBUG|ERROR]  Log level (INFO, DEBUG, ERROR)
-  --help                    Show this message and exit.
+  --input_path DIRECTORY         Please provide input path to the flattened
+                                 replaypacks that contain
+                                 procesed_mapping.json files.  [required]
+  --output_path DIRECTORY        Please provide output path where
+                                 processed_mapping.json will be copied.
+                                 [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --help                         Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
index ef1fcd3..a2d81cb 100644
--- a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
+++ b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
@@ -73,7 +73,7 @@ def processed_mapping_copier(input_path: str, output_path: str) -> None:
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level (INFO, DEBUG, ERROR)",
+    help="Log level",
 )
 def main(input_path: Path, output_path: Path, log: str) -> None:
     numeric_level = getattr(logging, log.upper(), None)
diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/README.md b/src/datasetpreparator/sc2/sc2_map_downloader/README.md
index e631767..59fc57c 100644
--- a/src/datasetpreparator/sc2/sc2_map_downloader/README.md
+++ b/src/datasetpreparator/sc2/sc2_map_downloader/README.md
@@ -8,8 +8,8 @@ Please keep in mind that the  ```src/sc2_map_downloader.py``` does not contain d
 ```
 Usage: sc2_map_downloader.py [OPTIONS]
 
-Tool for downloading StarCraft 2 (SC2) maps based on
-the data that is available within .SC2Replay file.
+Tool for downloading StarCraft 2 (SC2) maps based on the data that is
+available within .SC2Replay file.
 
 Options:
   --input_path DIRECTORY         Please provide input path to the dataset that
@@ -17,7 +17,7 @@ Options:
   --output_path DIRECTORY        Please provide output path where StarCraft 2
                                  (SC2) map files will be downloaded.
                                  [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level (INFO, DEBUG, ERROR)
+  --log [INFO|DEBUG|ERROR|WARN]  Log level
   --help                         Show this message and exit.
 ```
 
diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py b/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
index c7ed5b7..14e9076 100644
--- a/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
+++ b/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
@@ -134,7 +134,7 @@ def sc2_map_downloader(input_path: Path, output_path: Path) -> Path:
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level (INFO, DEBUG, ERROR)",
+    help="Log level",
 )
 def main(input_path: Path, output_path: Path, log: str) -> None:
     input_path = Path(input_path).resolve()
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
index bdfad89..c640213 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
@@ -24,7 +24,7 @@ Options:
   --n_processes INTEGER           Please provide the number of processes to be
                                   spawned for the dataset processing.
                                   [required]
-  --log [INFO|DEBUG|ERROR]        Log level (INFO, DEBUG, ERROR)
+  --log [INFO|DEBUG|ERROR|WARN]   Log level
   --help                          Show this message and exit.
 ```
 
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index f58a58d..38250c2 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -140,7 +140,7 @@ def sc2egset_replaypack_processor(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level (INFO, DEBUG, ERROR)",
+    help="Log level",
 )
 def main(
     input_path: Path,
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
index 9c41ae2..8047f8e 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
@@ -8,8 +8,7 @@ Please keep in mind that the ```src/sc2reset_replaypack_downloader.py```  contai
 ```
 Usage: sc2reset_replaypack_downloader.py [OPTIONS]
 
-Tool used for downloading
-SC2ReSet: StarCraft II Esport Replaypack Set
+Tool used for downloading SC2ReSet: StarCraft II Esport Replaypack Set
 (https://zenodo.org/doi/10.5281/zenodo.5575796).
 
 Options:
@@ -19,7 +18,7 @@ Options:
                                  will be unpacked.  [required]
   --n_workers INTEGER            Number of workers used for extracting the
                                  .zip archives.  [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level (INFO, DEBUG, ERROR)
+  --log [INFO|DEBUG|ERROR|WARN]  Log level
   --help                         Show this message and exit.
 ```
 
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
index 988e6cf..335656a 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
@@ -113,7 +113,7 @@ def sc2reset_replaypack_downloader(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level (INFO, DEBUG, ERROR)",
+    help="Log level",
 )
 def main(download_path: Path, unpack_path: Path, n_workers: int, log: str):
     numeric_level = getattr(logging, log.upper(), None)

From cff767fee63f49bae8b53e4a6ec9c995e0629b3f Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 23:40:18 +0100
Subject: [PATCH 50/92] refactor: changed the processing dir structure

---
 .../output/.gitkeep                                               | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename processing/{sc2_replaypack_processor => sc2egset_replaypack_processor}/output/.gitkeep (100%)

diff --git a/processing/sc2_replaypack_processor/output/.gitkeep b/processing/sc2egset_replaypack_processor/output/.gitkeep
similarity index 100%
rename from processing/sc2_replaypack_processor/output/.gitkeep
rename to processing/sc2egset_replaypack_processor/output/.gitkeep

From 76a54164e11852cd72d594df2d124f3cd6f6170a Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 20 Nov 2024 23:43:46 +0100
Subject: [PATCH 51/92] refactor: adjusted make targets for sc2egset, removed
 unused param

---
 makefile                                      | 44 +++++++------------
 .../sc2egset_replaypack_processor/README.md   | 23 +++++-----
 .../sc2egset_replaypack_processor.py          |  8 ----
 3 files changed, 27 insertions(+), 48 deletions(-)

diff --git a/makefile b/makefile
index 722e524..3a94896 100644
--- a/makefile
+++ b/makefile
@@ -15,55 +15,45 @@ sc2reset_sc2egset: ## Runs the entire processing pipeline to recreate SC2ReSet a
 	@make process_replaypacks
 	@make rename_files
 	@make package_sc2egset_dataset
+	@make package_sc2reset_dataset
 
 flatten: ## Flattens the directory if the files are held in nested directories. This helps with streamlining the processing.
-	docker run \
-		-v "./processing:/app/processing" \
-		datasetpreparator:latest \
-		python3 directory_flattener.py
-
-json_merge: ## Merges two JSON files.
-	docker run \
+	docker run --rm\
 		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
-		python3 json_merger.py \
-		--json_one=../processing/json_merger/map_translation.json \
-		--json_two=../processing/json_merger/new_maps_processed.json
-
-download_maps: ## Runs over directories with .SC2Replay files and downloads maps that were used in the games.
-	docker run \
-		-v "./processing:/app/processing" \
-		datasetpreparator:latest \
-		python3 sc2_map_downloader.py
+		python3 directory_flattener.py \
+		--input_dir ./processing/directory_flattener/input
+		--output_dir ./processing/directory_flattener/output
 
 process_replaypacks: ## Parses the raw (.SC2Replay) data into JSON files.
-	docker run \
+	docker run --rm\
 		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
-		python3 sc2_replaypack_processor.py \
+		python3 sc2egset_replaypack_processor.py \
+		--input_dir ./processing/directory_flattener/output \
+		--output_dir ./processing/sc2egset_replaypack_processor/output \
 		--n_processes 8 \
-		--perform_chat_anonymization "true"
 
 rename_files: ## Renames the files after processing with SC2InfoExtractorGo.
 	docker run \
 		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 file_renamer.py \
-		--input_dir ../processing/sc2_replaypack_processor/output
+		--input_dir ./processing/sc2egset_replaypack_processor/output
 
-package_sc2reset_dataset: ## Packages the raw data. Used to prepare SC2ReSet Replaypack set.
-	docker run \
+package_sc2egset_dataset: ## Packages the pre-processed dataset from the output of datasetpreparator. Used to prepare SC2EGSet Dataset.
+	docker run --rm \
 		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
-		python3 src/datasetpreparator/ file_packager.py \
-		--input_dir ../processing/directory_flattener/output
+		python3 file_packager.py \
+		--input_dir ./processing/sc2egset_replaypack_processor/output
 
-package_sc2egset_dataset: ## Packages the pre-processed dataset from the output of datasetpreparator. Used to prepare SC2EGSet Dataset.
-	docker run \
+package_sc2reset_dataset: ## Packages the raw data. Used to prepare SC2ReSet Replaypack set.
+	docker run --rm \
 		-v "./processing:/app/processing" \
 		datasetpreparator:latest \
 		python3 file_packager.py \
-		--input_dir ../processing/sc2_replaypack_processor/output
+		--input_dir ./processing/directory_flattener/output
 
 ###################
 #### DOCKER #######
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
index c640213..9a2ba1d 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
@@ -13,19 +13,16 @@ Tool used to execute SC2InfoExtractorGo
 directories. Assists in processing StarCraft 2 (SC2) datasets.
 
 Options:
-  --input_path DIRECTORY          Please provide an output directory for the
-                                  resulting files.  [required]
-  --output_path DIRECTORY         Please provide output path where StarCraft 2
-                                  (SC2) map files will be downloaded.
-                                  [required]
-  --perform_chat_anonymization BOOLEAN
-                                  Provide 'True' if chat should be anonymized,
-                                  otherwise 'False'.  [required]
-  --n_processes INTEGER           Please provide the number of processes to be
-                                  spawned for the dataset processing.
-                                  [required]
-  --log [INFO|DEBUG|ERROR|WARN]   Log level
-  --help                          Show this message and exit.
+  --input_path DIRECTORY         Please provide an output directory for the
+                                 resulting files.  [required]
+  --output_path DIRECTORY        Please provide output path where StarCraft 2
+                                 (SC2) map files will be downloaded.
+                                 [required]
+  --n_processes INTEGER          Please provide the number of processes to be
+                                 spawned for the dataset processing.
+                                 [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --help                         Show this message and exit.
 ```
 
 # Execute With Docker
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 38250c2..e215e54 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -122,13 +122,6 @@ def sc2egset_replaypack_processor(
     required=True,
     help="Please provide output path where StarCraft 2 (SC2) map files will be downloaded.",
 )
-@click.option(
-    "--perform_chat_anonymization",
-    type=bool,
-    default=False,
-    required=True,
-    help="Provide 'True' if chat should be anonymized, otherwise 'False'.",
-)
 @click.option(
     "--n_processes",
     type=int,
@@ -146,7 +139,6 @@ def main(
     input_path: Path,
     output_path: Path,
     n_processes: int,
-    perform_chat_anonymization: bool,
     log: str,
 ) -> None:
     numeric_level = getattr(logging, log.upper(), None)

From 6bce3cb4bc23921c3e7afdd91cf574bb42e4c471 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 21 Nov 2024 00:02:14 +0100
Subject: [PATCH 52/92] ci: added docker releases

---
 .github/workflows/ci.yml            | 11 ++++---
 .github/workflows/docker_images.yml | 47 +++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/docker_images.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4197989..dac1008 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,20 +1,23 @@
 name: continuous integration (ci)
 
-on: [pull_request, workflow_dispatch]
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+      - dev
+  workflow_dispatch:
 
 # To successfully find the files that are required for testing:
 env:
   TEST_WORKSPACE: ${{ github.workspace }}
 
 jobs:
-
   pre_commit:
     # Set up operating system
     runs-on: ubuntu-latest
-
     # Define job steps
     steps:
-
       - name: Check-out repository
         uses: actions/checkout@v4
 
diff --git a/.github/workflows/docker_images.yml b/.github/workflows/docker_images.yml
new file mode 100644
index 0000000..4900957
--- /dev/null
+++ b/.github/workflows/docker_images.yml
@@ -0,0 +1,47 @@
+name: Publish Docker Images
+
+# This should run only after the tests from the CI pipeline have passed.
+# On a rare ocassion contributors can trigger this manually, and it should also
+# run after a release has been published.
+on:
+  workflow_run:
+    workflows: ["continuous integration (ci)"]
+    types:
+      - completed
+  push:
+    branches:
+      - main
+      - dev
+  workflow_dispatch:
+  release:
+    types: [published]
+
+jobs:
+  push_to_registries:
+    name: Push Docker Image to Docker Hub
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+    steps:
+      - name: Check out Code
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+      - name: Log in to Docker Hub
+        uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+      - name: Extract Metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81
+        with:
+          images: |
+            kaszanas/datasetpreparator
+      - name: Build and Push Docker images
+        uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0
+        with:
+          context: .
+          file: ./docker/Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}

From f6eb98793ebeaec79625eaa08192710599358e8e Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 21 Nov 2024 00:21:34 +0100
Subject: [PATCH 53/92] build: added maps needed for SC2InfoExtractorGo

---
 docker/Dockerfile     | 2 ++
 docker/Dockerfile.dev | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7e4b75a..3c6c7d7 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -12,6 +12,8 @@ WORKDIR /app
 # Copying the replay parsing tool:
 # sc2egset_replaypack_processor requires the .exe file to be in the same directory as the script:
 COPY --from=extractor /app/SC2InfoExtractorGo /app/SC2InfoExtractorGo
+COPY --from=extractor /app/maps/ /app/maps/
+
 # Ensure the executable has the right permissions
 RUN chmod +x /app/SC2InfoExtractorGo
 
diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
index 4257ed9..df4fbab 100644
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@@ -19,6 +19,8 @@ WORKDIR /app
 # Copying the replay parsing tool:
 # sc2egset_replaypack_processor requires the .exe file to be in the same directory as the script:
 COPY --from=extractor /app/SC2InfoExtractorGo /app/SC2InfoExtractorGo
+COPY --from=extractor /app/maps/ /app/maps/
+
 # Ensure the executable has the right permissions:
 RUN chmod +x /app/SC2InfoExtractorGo
 

From 7ed1f1a3da3cb8567932b071ad7c561c485e1920 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 21 Nov 2024 00:28:28 +0100
Subject: [PATCH 54/92] refactor: using dev dockerfile in sc2reset_sc2egset
 process

---
 makefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/makefile b/makefile
index 3a94896..deba784 100644
--- a/makefile
+++ b/makefile
@@ -20,7 +20,7 @@ sc2reset_sc2egset: ## Runs the entire processing pipeline to recreate SC2ReSet a
 flatten: ## Flattens the directory if the files are held in nested directories. This helps with streamlining the processing.
 	docker run --rm\
 		-v "./processing:/app/processing" \
-		datasetpreparator:latest \
+		datasetpreparator:dev \
 		python3 directory_flattener.py \
 		--input_dir ./processing/directory_flattener/input
 		--output_dir ./processing/directory_flattener/output
@@ -28,7 +28,7 @@ flatten: ## Flattens the directory if the files are held in nested directories.
 process_replaypacks: ## Parses the raw (.SC2Replay) data into JSON files.
 	docker run --rm\
 		-v "./processing:/app/processing" \
-		datasetpreparator:latest \
+		datasetpreparator:dev \
 		python3 sc2egset_replaypack_processor.py \
 		--input_dir ./processing/directory_flattener/output \
 		--output_dir ./processing/sc2egset_replaypack_processor/output \
@@ -37,21 +37,21 @@ process_replaypacks: ## Parses the raw (.SC2Replay) data into JSON files.
 rename_files: ## Renames the files after processing with SC2InfoExtractorGo.
 	docker run \
 		-v "./processing:/app/processing" \
-		datasetpreparator:latest \
+		datasetpreparator:dev \
 		python3 file_renamer.py \
 		--input_dir ./processing/sc2egset_replaypack_processor/output
 
 package_sc2egset_dataset: ## Packages the pre-processed dataset from the output of datasetpreparator. Used to prepare SC2EGSet Dataset.
 	docker run --rm \
 		-v "./processing:/app/processing" \
-		datasetpreparator:latest \
+		datasetpreparator:dev \
 		python3 file_packager.py \
 		--input_dir ./processing/sc2egset_replaypack_processor/output
 
 package_sc2reset_dataset: ## Packages the raw data. Used to prepare SC2ReSet Replaypack set.
 	docker run --rm \
 		-v "./processing:/app/processing" \
-		datasetpreparator:latest \
+		datasetpreparator:dev \
 		python3 file_packager.py \
 		--input_dir ./processing/directory_flattener/output
 

From 4f117c6bb397cc870ae44ad3ff42b5937ae755e8 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Fri, 3 Jan 2025 21:52:18 +0100
Subject: [PATCH 55/92] docs: changed docs for a more concise read

---
 .../directory_flattener/directory_flattener.py         |  6 +++---
 .../directory_packager/directory_packager.py           |  6 +++---
 src/datasetpreparator/file_renamer/file_renamer.py     |  6 +++---
 src/datasetpreparator/json_merger/json_merger.py       | 10 +++++-----
 .../processed_mapping_copier.py                        |  8 ++++----
 .../sc2/sc2_map_downloader/sc2_map_downloader.py       |  8 ++++----
 .../sc2egset_replaypack_processor.py                   | 10 +++++-----
 .../sc2reset_replaypack_downloader.py                  |  6 +++---
 8 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index e466e13..4f4aac1 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -215,7 +215,7 @@ def multiple_directory_flattener(
         path_type=Path,
     ),
     required=True,
-    help="Please provide input path to the dataset that is going to be processed.",
+    help="Input path to the dataset that is going to be processed.",
 )
 @click.option(
     "--output_path",
@@ -227,14 +227,14 @@ def multiple_directory_flattener(
         path_type=Path,
     ),
     required=True,
-    help="Please provide output path where the tool will put files after processing.",
+    help="Output path where the tool will put files after processing.",
 )
 @click.option(
     "--file_extension",
     type=str,
     default=".SC2Replay",
     required=True,
-    help="Specify file extension for the files that will be put to the top level directory.",
+    help="File extension for the files that will be put to the top level directory. Example ('.SC2Replay').",
 )
 @click.option(
     "--log",
diff --git a/src/datasetpreparator/directory_packager/directory_packager.py b/src/datasetpreparator/directory_packager/directory_packager.py
index 9b94884..c112f9b 100644
--- a/src/datasetpreparator/directory_packager/directory_packager.py
+++ b/src/datasetpreparator/directory_packager/directory_packager.py
@@ -62,19 +62,19 @@ def dir_packager(directory_path: Path) -> Path:
 
 
 @click.command(
-    help="Tool used for processing StarCraft 2 (SC2) datasets. with https://github.com/Kaszanas/SC2InfoExtractorGo"
+    help="Tool that packages directories into .zip archives. Each directory in the input path is packaged into a separate .zip archive."
 )
 @click.option(
     "--input_path",
     type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
     required=True,
-    help="Please provide input path to the directory containing the dataset that is going to be processed by packaging into .zip archives.",
+    help="Input path to the directory containing the dataset that is going to be processed by packaging into .zip archives.",
 )
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level",
+    help="Log level. Default is WARN.",
 )
 def main(input_path: Path, log: str):
     numeric_level = getattr(logging, log.upper(), None)
diff --git a/src/datasetpreparator/file_renamer/file_renamer.py b/src/datasetpreparator/file_renamer/file_renamer.py
index b3271ba..cb10725 100644
--- a/src/datasetpreparator/file_renamer/file_renamer.py
+++ b/src/datasetpreparator/file_renamer/file_renamer.py
@@ -76,19 +76,19 @@ def file_renamer(input_path: Path) -> None:
 
 
 @click.command(
-    help="Tool used for processing StarCraft 2 (SC2) datasets. with https://github.com/Kaszanas/SC2InfoExtractorGo"
+    help="Tool used for renaming auxilliary files (log files) that are produced when creating StarCraft 2 (SC2) datasets with https://github.com/Kaszanas/SC2InfoExtractorGo"
 )
 @click.option(
     "--input_path",
     type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
     required=True,
-    help="Please provide input path to the directory containing the dataset that is going to be processed by packaging into .zip archives.",
+    help="Input path to the directory containing the dataset that is going to be processed by packaging into .zip archives.",
 )
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level",
+    help="Log level. Default is WARN.",
 )
 def main(input_path: Path, log: str) -> None:
     numeric_level = getattr(logging, log.upper(), None)
diff --git a/src/datasetpreparator/json_merger/json_merger.py b/src/datasetpreparator/json_merger/json_merger.py
index 0e5307a..9c607ef 100644
--- a/src/datasetpreparator/json_merger/json_merger.py
+++ b/src/datasetpreparator/json_merger/json_merger.py
@@ -97,31 +97,31 @@ def json_merger(
 
 
 @click.command(
-    help="Tool used for merging two .json files. Created in order to merge two mappings created by https://github.com/Kaszanas/SC2MapLocaleExtractor"
+    help="Tool used for merging two .json files. Originally used to merge two mappings created by https://github.com/Kaszanas/SC2MapLocaleExtractor"
 )
 @click.option(
     "--json_one",
     type=click.Path(exists=True, dir_okay=False, file_okay=True, resolve_path=True),
     required=True,
-    help="Please provide the path to the first .json file that is going to be merged.",
+    help="Path to the first .json file that is going to be merged.",
 )
 @click.option(
     "--json_two",
     type=click.Path(writable=True, dir_okay=False, file_okay=True, resolve_path=True),
     required=True,
-    help="Please provide the path to the second .json file that is going to be merged.",
+    help="Path to the second .json file that is going to be merged.",
 )
 @click.option(
     "--output_filepath",
     type=click.Path(dir_okay=False, file_okay=True, resolve_path=True),
     required=True,
-    help="Please provide a filepath to which the result JSON file will be saved, note that any existing file of the same name will be overwriten.",
+    help="Filepath to which the result JSON file will be saved, note that any existing file of the same name will be overwriten.",
 )
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level",
+    help="Log level. Default is WARN.",
 )
 def main(
     path_to_json_one: Path, path_to_json_two: Path, output_filepath: Path, log: str
diff --git a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
index a2d81cb..42efd0f 100644
--- a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
+++ b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
@@ -43,7 +43,7 @@ def processed_mapping_copier(input_path: str, output_path: str) -> None:
 
 
 @click.command(
-    help="Tool for copying the processed_mapping.json files to the matching directory after processing the replaypack into a JSON dataset. This step is required to define the StarCraft 2 (SC2) dataset."
+    help="Tool for copying the auxilliary file of processed_mapping.json to the matching directory after processing the replaypack into a JSON dataset with sc2egset_replaypack_processor.py. This script is required to reproduce SC2EGSet Dataset."
 )
 @click.option(
     "--input_path",
@@ -55,7 +55,7 @@ def processed_mapping_copier(input_path: str, output_path: str) -> None:
         path_type=Path,
     ),
     required=True,
-    help="Please provide input path to the flattened replaypacks that contain procesed_mapping.json files.",
+    help="Input path to the flattened replaypacks that contain procesed_mapping.json files.",
 )
 @click.option(
     "--output_path",
@@ -67,13 +67,13 @@ def processed_mapping_copier(input_path: str, output_path: str) -> None:
         path_type=Path,
     ),
     required=True,
-    help="Please provide output path where processed_mapping.json will be copied.",
+    help="Output path where processed_mapping.json will be copied.",
 )
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level",
+    help="Log level. Default is WARN.",
 )
 def main(input_path: Path, output_path: Path, log: str) -> None:
     numeric_level = getattr(logging, log.upper(), None)
diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py b/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
index 14e9076..bf3d9d4 100644
--- a/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
+++ b/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
@@ -104,7 +104,7 @@ def sc2_map_downloader(input_path: Path, output_path: Path) -> Path:
 
 
 @click.command(
-    help="Tool for downloading StarCraft 2 (SC2) maps based on the data that is available within .SC2Replay file."
+    help="Tool for downloading StarCraft 2 (SC2) maps based on the data that available within .SC2Replay files."
 )
 @click.option(
     "--input_path",
@@ -116,7 +116,7 @@ def sc2_map_downloader(input_path: Path, output_path: Path) -> Path:
         path_type=Path,
     ),
     required=True,
-    help="Please provide input path to the dataset that is going to be processed.",
+    help="Input path to the dataset that is going to be processed. The script will find all .SC2Replay files in the directory.",
 )
 @click.option(
     "--output_path",
@@ -128,13 +128,13 @@ def sc2_map_downloader(input_path: Path, output_path: Path) -> Path:
         path_type=Path,
     ),
     required=True,
-    help="Please provide output path where StarCraft 2 (SC2) map files will be downloaded.",
+    help="Output path where StarCraft 2 (SC2) map files will be downloaded.",
 )
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level",
+    help="Log level. Default is WARN.",
 )
 def main(input_path: Path, output_path: Path, log: str) -> None:
     input_path = Path(input_path).resolve()
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index e215e54..b9e3ce5 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -96,7 +96,7 @@ def sc2egset_replaypack_processor(
 
 
 @click.command(
-    help="Tool used to execute SC2InfoExtractorGo (https://github.com/Kaszanas/SC2InfoExtractorGo) on multiple replaypack directories. Assists in processing StarCraft 2 (SC2) datasets."
+    help="Tool used to recreate SC2EGSet Dataset. Executes SC2InfoExtractorGo (https://github.com/Kaszanas/SC2InfoExtractorGo) on multiple replaypack directories. Assists in processing StarCraft 2 (SC2) datasets."
 )
 @click.option(
     "--input_path",
@@ -108,7 +108,7 @@ def sc2egset_replaypack_processor(
         path_type=Path,
     ),
     required=True,
-    help="Please provide an output directory for the resulting files.",
+    help="Output directory for the resulting files.",
 )
 @click.option(
     "--output_path",
@@ -120,20 +120,20 @@ def sc2egset_replaypack_processor(
         path_type=Path,
     ),
     required=True,
-    help="Please provide output path where StarCraft 2 (SC2) map files will be downloaded.",
+    help="Output path where StarCraft 2 (SC2) map files will be downloaded.",
 )
 @click.option(
     "--n_processes",
     type=int,
     default=4,
     required=True,
-    help="Please provide the number of processes to be spawned for the dataset processing.",
+    help="Number of processes to be spawned for the dataset processing.",
 )
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level",
+    help="Log level. Default is WARN.",
 )
 def main(
     input_path: Path,
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
index 335656a..6db2161 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
@@ -88,7 +88,7 @@ def sc2reset_replaypack_downloader(
         path_type=Path,
     ),
     required=True,
-    help="Please provide a path to which the archives will be downloaded.",
+    help="Path to which the archives will be downloaded.",
 )
 @click.option(
     "--unpack_path",
@@ -100,7 +100,7 @@ def sc2reset_replaypack_downloader(
         path_type=Path,
     ),
     required=True,
-    help="Please provide a path to which the archives will be unpacked.",
+    help="Path to which the archives will be unpacked.",
 )
 @click.option(
     "--n_workers",
@@ -113,7 +113,7 @@ def sc2reset_replaypack_downloader(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level",
+    help="Log level. Default is WARN.",
 )
 def main(download_path: Path, unpack_path: Path, n_workers: int, log: str):
     numeric_level = getattr(logging, log.upper(), None)

From 623fb61a40f8a4881105c9637102461f1aec459d Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Fri, 3 Jan 2025 21:53:23 +0100
Subject: [PATCH 56/92] build: bumped ruff and commitizen versions

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8878a7a..80b6736 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,7 +22,7 @@ requests = { version = "^2.31.0", optional = true }
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.3"
 pytest-cov = "^6.0.0"
-ruff = "^0.7.4"
+ruff = "^0.8.5"
 pre-commit = "^4.0.1"
 mkdocs = "^1.5.3"
 mkdocstrings = "^0.27.0"
@@ -34,7 +34,7 @@ mkdocs-autorefs = "^1.2.0"
 mkdocs-gen-files = "^0.5.0"
 mkdocs-literate-nav = "^0.6.1"
 mkdocs-section-index = "^0.3.8"
-commitizen = "^3.13.0"
+commitizen = "^4.1.0"
 python-dotenv = "^1.0.1"
 
 [tool.poetry.extras]

From ce585891730320f074eee278827c90c4750bdd93 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Fri, 3 Jan 2025 21:57:05 +0100
Subject: [PATCH 57/92] build: ran poetry lock

---
 poetry.lock | 97 ++++++++++++++++++++---------------------------------
 1 file changed, 37 insertions(+), 60 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index de4d3d8..1140f44 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -187,27 +187,27 @@ files = [
 
 [[package]]
 name = "commitizen"
-version = "3.13.0"
+version = "4.1.0"
 description = "Python commitizen client tool"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "commitizen-3.13.0-py3-none-any.whl", hash = "sha256:ff57069591ff109136b70841fe79a3434d0525748995531cceb4f3ccadb44ead"},
-    {file = "commitizen-3.13.0.tar.gz", hash = "sha256:53cd225ae44fc25cb1582f5d50cda78711a5a1d44a32fee3dcf7a22bc204ce06"},
+    {file = "commitizen-4.1.0-py3-none-any.whl", hash = "sha256:2e6c5fbd442cab4bcc5a04bc86ef2196ef84bcf611317d6c596e87f5bb4c09f5"},
+    {file = "commitizen-4.1.0.tar.gz", hash = "sha256:4f2d9400ec411aec1c738d4c63fc7fd5807cd6ddf6be970869e03e68b88ff718"},
 ]
 
 [package.dependencies]
-argcomplete = ">=1.12.1,<3.2"
+argcomplete = ">=1.12.1,<3.6"
 charset-normalizer = ">=2.1.0,<4"
 colorama = ">=0.4.1,<0.5.0"
 decli = ">=0.6.0,<0.7.0"
-importlib_metadata = ">=4.13,<7"
 jinja2 = ">=2.10.3"
 packaging = ">=19"
 pyyaml = ">=3.08"
 questionary = ">=2.0,<3.0"
 termcolor = ">=1.1,<3"
 tomlkit = ">=0.5.3,<1.0.0"
+typing-extensions = {version = ">=4.0.1,<5.0.0", markers = "python_version < \"3.11\""}
 
 [[package]]
 name = "coverage"
@@ -394,25 +394,6 @@ files = [
     {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
 ]
 
-[[package]]
-name = "importlib-metadata"
-version = "6.11.0"
-description = "Read metadata from Python packages"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "importlib_metadata-6.11.0-py3-none-any.whl", hash = "sha256:f0afba6205ad8f8947c7d338b5342d5db2afbfd82f9cbef7879a9539cc12eb9b"},
-    {file = "importlib_metadata-6.11.0.tar.gz", hash = "sha256:1231cf92d825c9e03cfc4da076a16de6422c863558229ea0b22b675657463443"},
-]
-
-[package.dependencies]
-zipp = ">=0.5"
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
-perf = ["ipython"]
-testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"]
-
 [[package]]
 name = "iniconfig"
 version = "2.0.0"
@@ -1155,29 +1136,29 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "ruff"
-version = "0.7.4"
+version = "0.8.5"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.7.4-py3-none-linux_armv6l.whl", hash = "sha256:a4919925e7684a3f18e18243cd6bea7cfb8e968a6eaa8437971f681b7ec51478"},
-    {file = "ruff-0.7.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cfb365c135b830778dda8c04fb7d4280ed0b984e1aec27f574445231e20d6c63"},
-    {file = "ruff-0.7.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:63a569b36bc66fbadec5beaa539dd81e0527cb258b94e29e0531ce41bacc1f20"},
-    {file = "ruff-0.7.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d06218747d361d06fd2fdac734e7fa92df36df93035db3dc2ad7aa9852cb109"},
-    {file = "ruff-0.7.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0cea28d0944f74ebc33e9f934238f15c758841f9f5edd180b5315c203293452"},
-    {file = "ruff-0.7.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80094ecd4793c68b2571b128f91754d60f692d64bc0d7272ec9197fdd09bf9ea"},
-    {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:997512325c6620d1c4c2b15db49ef59543ef9cd0f4aa8065ec2ae5103cedc7e7"},
-    {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00b4cf3a6b5fad6d1a66e7574d78956bbd09abfd6c8a997798f01f5da3d46a05"},
-    {file = "ruff-0.7.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7dbdc7d8274e1422722933d1edddfdc65b4336abf0b16dfcb9dedd6e6a517d06"},
-    {file = "ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e92dfb5f00eaedb1501b2f906ccabfd67b2355bdf117fea9719fc99ac2145bc"},
-    {file = "ruff-0.7.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3bd726099f277d735dc38900b6a8d6cf070f80828877941983a57bca1cd92172"},
-    {file = "ruff-0.7.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2e32829c429dd081ee5ba39aef436603e5b22335c3d3fff013cd585806a6486a"},
-    {file = "ruff-0.7.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:662a63b4971807623f6f90c1fb664613f67cc182dc4d991471c23c541fee62dd"},
-    {file = "ruff-0.7.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:876f5e09eaae3eb76814c1d3b68879891d6fde4824c015d48e7a7da4cf066a3a"},
-    {file = "ruff-0.7.4-py3-none-win32.whl", hash = "sha256:75c53f54904be42dd52a548728a5b572344b50d9b2873d13a3f8c5e3b91f5cac"},
-    {file = "ruff-0.7.4-py3-none-win_amd64.whl", hash = "sha256:745775c7b39f914238ed1f1b0bebed0b9155a17cd8bc0b08d3c87e4703b990d6"},
-    {file = "ruff-0.7.4-py3-none-win_arm64.whl", hash = "sha256:11bff065102c3ae9d3ea4dc9ecdfe5a5171349cdd0787c1fc64761212fc9cf1f"},
-    {file = "ruff-0.7.4.tar.gz", hash = "sha256:cd12e35031f5af6b9b93715d8c4f40360070b2041f81273d0527683d5708fce2"},
+    {file = "ruff-0.8.5-py3-none-linux_armv6l.whl", hash = "sha256:5ad11a5e3868a73ca1fa4727fe7e33735ea78b416313f4368c504dbeb69c0f88"},
+    {file = "ruff-0.8.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f69ab37771ea7e0715fead8624ec42996d101269a96e31f4d31be6fc33aa19b7"},
+    {file = "ruff-0.8.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b5462d7804558ccff9c08fe8cbf6c14b7efe67404316696a2dde48297b1925bb"},
+    {file = "ruff-0.8.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d56de7220a35607f9fe59f8a6d018e14504f7b71d784d980835e20fc0611cd50"},
+    {file = "ruff-0.8.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9d99cf80b0429cbebf31cbbf6f24f05a29706f0437c40413d950e67e2d4faca4"},
+    {file = "ruff-0.8.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b75ac29715ac60d554a049dbb0ef3b55259076181c3369d79466cb130eb5afd"},
+    {file = "ruff-0.8.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c9d526a62c9eda211b38463528768fd0ada25dad524cb33c0e99fcff1c67b5dc"},
+    {file = "ruff-0.8.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:587c5e95007612c26509f30acc506c874dab4c4abbacd0357400bd1aa799931b"},
+    {file = "ruff-0.8.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:622b82bf3429ff0e346835ec213aec0a04d9730480cbffbb6ad9372014e31bbd"},
+    {file = "ruff-0.8.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f99be814d77a5dac8a8957104bdd8c359e85c86b0ee0e38dca447cb1095f70fb"},
+    {file = "ruff-0.8.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c01c048f9c3385e0fd7822ad0fd519afb282af9cf1778f3580e540629df89725"},
+    {file = "ruff-0.8.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7512e8cb038db7f5db6aae0e24735ff9ea03bb0ed6ae2ce534e9baa23c1dc9ea"},
+    {file = "ruff-0.8.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:762f113232acd5b768d6b875d16aad6b00082add40ec91c927f0673a8ec4ede8"},
+    {file = "ruff-0.8.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:03a90200c5dfff49e4c967b405f27fdfa81594cbb7c5ff5609e42d7fe9680da5"},
+    {file = "ruff-0.8.5-py3-none-win32.whl", hash = "sha256:8710ffd57bdaa6690cbf6ecff19884b8629ec2a2a2a2f783aa94b1cc795139ed"},
+    {file = "ruff-0.8.5-py3-none-win_amd64.whl", hash = "sha256:4020d8bf8d3a32325c77af452a9976a9ad6455773bcb94991cf15bd66b347e47"},
+    {file = "ruff-0.8.5-py3-none-win_arm64.whl", hash = "sha256:134ae019ef13e1b060ab7136e7828a6d83ea727ba123381307eb37c6bd5e01cb"},
+    {file = "ruff-0.8.5.tar.gz", hash = "sha256:1098d36f69831f7ff2a1da3e6407d5fbd6dfa2559e4f74ff2d260c5588900317"},
 ]
 
 [[package]]
@@ -1277,6 +1258,17 @@ notebook = ["ipywidgets (>=6)"]
 slack = ["slack-sdk"]
 telegram = ["requests"]
 
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
+
 [[package]]
 name = "urllib3"
 version = "2.1.0"
@@ -1377,25 +1369,10 @@ files = [
     {file = "wcwidth-0.2.12.tar.gz", hash = "sha256:f01c104efdf57971bcb756f054dd58ddec5204dd15fa31d6503ea57947d97c02"},
 ]
 
-[[package]]
-name = "zipp"
-version = "3.17.0"
-description = "Backport of pathlib-compatible object wrapper for zip files"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "zipp-3.17.0-py3-none-any.whl", hash = "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31"},
-    {file = "zipp-3.17.0.tar.gz", hash = "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"},
-]
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"]
-
 [extras]
 all = ["requests", "sc2reader", "tqdm"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "f563a3f794c497d3da2d5094f42cb142b15d1fa6b34c2bce997921e7de459cdf"
+content-hash = "04502fcda957c4512081b32b4ff852f40bed1d0e8553ec3131889070d453e86a"

From e82d3558255b5a883d3a24bd94345a770baa5783 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 5 Jan 2025 17:43:30 +0100
Subject: [PATCH 58/92] docs: refined documentation, added TODO

---
 .../directory_flattener/directory_flattener.py         |  4 ++--
 src/datasetpreparator/json_merger/json_merger.py       |  2 +-
 .../sc2egset_replaypack_processor.py                   | 10 ++++++----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index 4f4aac1..dcf10f9 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -203,7 +203,7 @@ def multiple_directory_flattener(
 
 
 @click.command(
-    help="Directory restructuring tool used in order to flatten the structure, map the old structure to a separate file, and for later processing with other tools. Created primarily to define StarCraft 2 (SC2) datasets."
+    help="Directory restructuring tool used in order to flatten the structure. Saves the mapping of the old directory structure to a separate file. Used to ease processing with other tools. Can be used to extract additional meaning from the directory structure in case of tournament replaypacks. Created primarily to define StarCraft 2 (SC2) datasets."
 )
 @click.option(
     "--input_path",
@@ -240,7 +240,7 @@ def multiple_directory_flattener(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
-    help="Log level",
+    help="Log level. Default is WARN.",
 )
 def main(input_path: Path, output_path: Path, file_extension: str, log: str) -> None:
     numeric_level = getattr(logging, log.upper(), None)
diff --git a/src/datasetpreparator/json_merger/json_merger.py b/src/datasetpreparator/json_merger/json_merger.py
index 9c607ef..8822df0 100644
--- a/src/datasetpreparator/json_merger/json_merger.py
+++ b/src/datasetpreparator/json_merger/json_merger.py
@@ -97,7 +97,7 @@ def json_merger(
 
 
 @click.command(
-    help="Tool used for merging two .json files. Originally used to merge two mappings created by https://github.com/Kaszanas/SC2MapLocaleExtractor"
+    help="Tool used for merging two .json files. Originally used to merge two json files created by https://github.com/Kaszanas/SC2MapLocaleExtractor"
 )
 @click.option(
     "--json_one",
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index b9e3ce5..6319683 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -96,7 +96,7 @@ def sc2egset_replaypack_processor(
 
 
 @click.command(
-    help="Tool used to recreate SC2EGSet Dataset. Executes SC2InfoExtractorGo (https://github.com/Kaszanas/SC2InfoExtractorGo) on multiple replaypack directories. Assists in processing StarCraft 2 (SC2) datasets."
+    help="Tool used to recreate SC2ReSet and SC2EGSet Dataset. Executes the entire pipeline for replay processing. Depends on SC2InfoExtractorGo (https://github.com/Kaszanas/SC2InfoExtractorGo) which is executed on multiple replaypack directories in the process. Assists in processing StarCraft 2 (SC2) datasets."
 )
 @click.option(
     "--input_path",
@@ -108,7 +108,7 @@ def sc2egset_replaypack_processor(
         path_type=Path,
     ),
     required=True,
-    help="Output directory for the resulting files.",
+    help="Input directory containing multiple StarCraft 2 replaypacks. These files will be processed exactly the same as SC2ReSet and SC2EGSet datasets.",
 )
 @click.option(
     "--output_path",
@@ -120,14 +120,14 @@ def sc2egset_replaypack_processor(
         path_type=Path,
     ),
     required=True,
-    help="Output path where StarCraft 2 (SC2) map files will be downloaded.",
+    help="Output path where the tool will place the processed files for SC2ReSet and SC2EGSet dataset as children directories.",
 )
 @click.option(
     "--n_processes",
     type=int,
     default=4,
     required=True,
-    help="Number of processes to be spawned for the dataset processing.",
+    help="Number of processes to be spawned for the dataset processing with SC2InfoExtractorGo.",
 )
 @click.option(
     "--log",
@@ -146,6 +146,8 @@ def main(
         raise ValueError(f"Invalid log level: {numeric_level}")
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
+    # TODO: Recreate the entire pipeline for SC2ReSet and SC2EGSet:
+
     arguments = ReplaypackProcessorArguments(
         input_path=input_path,
         output_path=output_path,

From d257a0d916802d24e495a44708aaf869af3d638c Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 5 Jan 2025 20:06:08 +0100
Subject: [PATCH 59/92] build: added variables in makefile, adjusted targets,
 added echo

---
 makefile | 129 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 104 insertions(+), 25 deletions(-)

diff --git a/makefile b/makefile
index deba784..f3ba8de 100644
--- a/makefile
+++ b/makefile
@@ -1,6 +1,17 @@
+# Docker variables:
 DOCKER_DIR = ./docker
+DOCKER_FILE = $(DOCKER_DIR)/Dockerfile
+DOCKER_FILE_DEV = $(DOCKER_DIR)/Dockerfile.dev
+
+# Local devcontainer
+DEVCONTAINER_NAME = datasetpreparator:devcontainer
+DEV_BRANCH_CONTAINER = datasetpreparator:dev
+
+# Compose variables:
 TEST_COMPOSE = $(DOCKER_DIR)/docker-test-compose.yml
+COMPOSE_PROJECT_NAME = datasetpreparator
 
+# Python variables:
 PYTHON_VERSION = 3.11
 
 TEST_COMMAND = "poetry run pytest --durations=100 --ignore-glob='test_*.py' tests --cov=datasetpreparator --cov-report term-missing --cov-report html 2>&1"
@@ -10,6 +21,7 @@ TEST_COMMAND_LOG = "poetry run pytest --durations=100 --ignore-glob='test_*.py'
 ###################
 #### PIPELINE #####
 ###################
+.PHONY: sc2reset_sc2egset
 sc2reset_sc2egset: ## Runs the entire processing pipeline to recreate SC2ReSet and SC2EGSet or any other dataset using our standard tooling.
 	@make flatten
 	@make process_replaypacks
@@ -17,123 +29,190 @@ sc2reset_sc2egset: ## Runs the entire processing pipeline to recreate SC2ReSet a
 	@make package_sc2egset_dataset
 	@make package_sc2reset_dataset
 
+.PHONY: flatten
 flatten: ## Flattens the directory if the files are held in nested directories. This helps with streamlining the processing.
+	@echo "Flattening the directory structure."
+	@make docker_pull_dev
+	@echo "Using the dev branch Docker image: $(DEV_BRANCH_CONTAINER)"
 	docker run --rm\
 		-v "./processing:/app/processing" \
-		datasetpreparator:dev \
+		$(DEV_BRANCH_CONTAINER) \
 		python3 directory_flattener.py \
 		--input_dir ./processing/directory_flattener/input
 		--output_dir ./processing/directory_flattener/output
 
+.PHONY: process_replaypacks
 process_replaypacks: ## Parses the raw (.SC2Replay) data into JSON files.
+	@echo "Processing the replaypacks."
+	@make docker_pull_dev
+	@echo "Using the dev branch Docker image: $(DEV_BRANCH_CONTAINER)"
 	docker run --rm\
 		-v "./processing:/app/processing" \
-		datasetpreparator:dev \
+		$(DEV_BRANCH_CONTAINER) \
 		python3 sc2egset_replaypack_processor.py \
 		--input_dir ./processing/directory_flattener/output \
 		--output_dir ./processing/sc2egset_replaypack_processor/output \
 		--n_processes 8 \
 
+.PHONY: rename_files
 rename_files: ## Renames the files after processing with SC2InfoExtractorGo.
+	@echo "Renaming the files."
+	@make docker_pull_dev
+	@echo "Using the dev branch Docker image: $(DEV_BRANCH_CONTAINER)"
 	docker run \
 		-v "./processing:/app/processing" \
-		datasetpreparator:dev \
+		$(DEV_BRANCH_CONTAINER) \
 		python3 file_renamer.py \
 		--input_dir ./processing/sc2egset_replaypack_processor/output
 
+.PHONY: package_sc2egset_dataset
 package_sc2egset_dataset: ## Packages the pre-processed dataset from the output of datasetpreparator. Used to prepare SC2EGSet Dataset.
+	@echo "Packaging the dataset."
+	@make docker_pull_dev
+	@echo "Using the dev branch Docker image: $(DEV_BRANCH_CONTAINER)"
 	docker run --rm \
 		-v "./processing:/app/processing" \
-		datasetpreparator:dev \
+		$(DEV_BRANCH_CONTAINER) \
 		python3 file_packager.py \
 		--input_dir ./processing/sc2egset_replaypack_processor/output
 
+.PHONY: package_sc2reset_dataset
 package_sc2reset_dataset: ## Packages the raw data. Used to prepare SC2ReSet Replaypack set.
+	@echo "Packaging the dataset."
+	@make docker_pull_dev
+	@echo "Using the dev branch Docker image: $(DEV_BRANCH_CONTAINER)"
 	docker run --rm \
 		-v "./processing:/app/processing" \
-		datasetpreparator:dev \
+		$(DEV_BRANCH_CONTAINER) \
 		python3 file_packager.py \
 		--input_dir ./processing/directory_flattener/output
 
 ###################
 #### DOCKER #######
 ###################
+.PHONY: docker_pull
+docker_pull_dev: ## Pulls the latest image from the Docker Hub.
+	@echo "Pulling the dev branch Docker image: $(DEV_BRANCH_CONTAINER)"
+	docker pull $(DEV_BRANCH_CONTAINER)
+
+.PHONY: docker_build
 docker_build: ## Builds the image containing all of the tools.
+	@echo "Building the Dockerfile: $(DOCKER_FILE)"
+	@echo "Using Python version: $(PYTHON_VERSION)"
 	docker build \
-	--build-arg="PYTHON_VERSION=$(PYTHON_VERSION)" \
-	-f ./docker/Dockerfile . \
-	--tag=datasetpreparator
+		--build-arg="PYTHON_VERSION=$(PYTHON_VERSION)" \
+		-f $(DOCKER_FILE) . \
+		--tag=datasetpreparator
 
-docker_build_dev: ## Builds the development image containing all of the tools.
+.PHONY: docker_build_devcontainer
+docker_build_devcontainer: ## Builds the development image containing all of the tools.
 	docker build \
-	--build-arg="PYTHON_VERSION=$(PYTHON_VERSION)" \
-	-f ./docker/Dockerfile.dev . \
-	--tag=datasetpreparator:devcontainer
+		--build-arg="PYTHON_VERSION=$(PYTHON_VERSION)" \
+		-f $(DOCKER_FILE_DEV) . \
+		--tag=$(DEVCONTAINER_NAME)
 
+.PHONY: docker_run_test
 docker_run_test: ## Runs the test command using Docker.
 	docker run --rm \
-		datasetpreparator:devcontainer \
+		$(DEVCONTAINER_NAME) \
 		sh -c \
 		$(TEST_COMMAND)
 
+.PHONY: docker_run_dev
 docker_run_dev: ## Runs the development image containing all of the tools.
+	@echo "Running the devcontainer image: $(DEVCONTAINER_NAME)"
 	docker run \
 		-v ".:/app" \
 		-it \
 		-e "TEST_WORKSPACE=/app" \
-		datasetpreparator:devcontainer \
+		$(DEVCONTAINER_NAME) \
 		bash
 
 ###################
 #### DOCS #########
 ###################
+.PHONY: doc_serve
 doc_serve: ## Serves the Mkdocs documentation locally.
+	@echo "Serving the Mkdocs documentation."
 	poetry run mkdocs serve
 
+.PHONY: doc_build
 doc_build: ## Builds the Mkdocs documentation.
+	@echo "Building the Mkdocs documentation."
 	poetry run mkdocs build
 
+.PHONY: docker_doc_build
 docker_doc_build: ## Builds the Mkdocs documentation using Docker.
+	@echo "Building the Mkdocs documentation using Docker."
+	@make docker_build_devcontainer
+	@echo "Using the devcontainer image: $(DEVCONTAINER_NAME)"
 	docker run \
 		-v "./docs:/docs" \
-		datasetpreparator:devcontainer \
+		$(DEVCONTAINER_NAME) \
 		poetry run mkdocs build
 
+.PHONY: docker_doc_build_action
 docker_doc_build_action: ## Builds the Mkdocs documentation using Docker.
+	@echo "Building the Mkdocs documentation using Docker."
+	@make docker_build_devcontainer
+	@echo "Using the devcontainer image: $(DEVCONTAINER_NAME)"
 	docker run \
 		-v "./docs:/docs" \
-		datasetpreparator:devcontainer \
+		$(DEVCONTAINER_NAME) \
 		poetry run mkdocs build
 
 ###################
 #### PRE-COMMIT ###
 ###################
+.PHONY: docker_pre_commit
 docker_pre_commit: ## Runs pre-commit hooks using Docker.
-	@make docker_build_dev
+	@echo "Running pre-commit hooks using Docker."
+	@make docker_build_devcontainer
+	@echo "Using the devcontainer image: $(DEVCONTAINER_NAME)"
 	docker run \
 		-v ".:/app" \
-		datasetpreparator:devcontainer \
+		$(DEVCONTAINER_NAME) \
 		pre-commit run --all-files
 
+.PHONY: docker_pre_commit_action
 docker_pre_commit_action: ## Runs pre-commit hooks using Docker.
-	@make docker_build_dev
+	@echo "Running pre-commit hooks using Docker."
+	@make docker_build_devcontainer
+	@echo "Using the devcontainer image: $(DEVCONTAINER_NAME)"
 	docker run \
-		datasetpreparator:devcontainer \
+		$(DEVCONTAINER_NAME) \
 		pre-commit run --all-files
 
 ###################
 #### TESTING ######
 ###################
+.PHONY: compose_build
 compose_build: ## Builds the Docker Image with docker-compose.
-	docker-compose -f $(TEST_COMPOSE) build
-
+	@echo "Building the Docker Image with docker-compose."
+	@echo "Using the test compose file: $(TEST_COMPOSE)"
+	docker compose \
+		-p $(COMPOSE_PROJECT_NAME) \
+		-f $(TEST_COMPOSE) \
+		build
+
+.PHONY: action_compose_test
 action_compose_test: ## Runs the tests using Docker.
-	docker compose -f $(TEST_COMPOSE) run --rm lib \
+	@echo "Running the tests using Docker."
+	@echo "Using the test compose file: $(TEST_COMPOSE)"
+	docker compose -p $(COMPOSE_PROJECT_NAME) -f $(TEST_COMPOSE) run --rm lib \
 	bash -c $(TEST_COMMAND) --exit-code-from lib
 
+.PHONY: compose_remove
 compose_remove: ## Stops and removes the testing containers, images, volumes.
-	docker-compose -f $(TEST_COMPOSE) down --volumes --remove-orphans
-
+	@echo "Stopping and removing the testing containers, images, volumes."
+	@echo "Using the test compose file: $(TEST_COMPOSE)"
+	docker compose \
+		-p $(COMPOSE_PROJECT_NAME) \
+		-f $(TEST_COMPOSE) \
+		down --volumes \
+		--remove-orphans
+
+.PHONY: compose_test
 compose_test: compose_build action_compose_test compose_remove
 
 .PHONY: help

From d5a5393b32e385603febe60bfe9adae68b21895f Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 5 Jan 2025 20:12:00 +0100
Subject: [PATCH 60/92] docs: changed docs, new CLI text, renamed container

---
 .../directory_flattener/README.md             | 32 +++++++++--------
 .../directory_packager/README.md              | 21 ++++++------
 src/datasetpreparator/file_renamer/README.md  | 22 ++++++------
 src/datasetpreparator/json_merger/README.md   | 29 ++++++++--------
 .../processed_mapping_copier/README.md        | 26 +++++++-------
 .../sc2/sc2_map_downloader/README.md          | 23 +++++++------
 .../sc2egset_replaypack_processor/README.md   | 34 +++++++++++--------
 .../sc2reset_replaypack_downloader/README.md  | 22 ++++++------
 8 files changed, 107 insertions(+), 102 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/README.md b/src/datasetpreparator/directory_flattener/README.md
index b6c8984..46d3798 100644
--- a/src/datasetpreparator/directory_flattener/README.md
+++ b/src/datasetpreparator/directory_flattener/README.md
@@ -4,24 +4,26 @@ Utility script for entering each of the supplied replaypacks and flattening its
 
 # CLI Usage
 
-Please keep in mind that ```src/directory_flattener.py``` does not contain default flag values and can be customized with the following command line flags:
+Please keep in mind that ```src/directory_flattener.py``` contains required argument values and can be customized with the following command line interaface:
 
 ```
 Usage: directory_flattener.py [OPTIONS]
 
-Directory restructuring tool used in order to flatten the structure, map the
-old structure to a separate file, and for later processing with other tools.
-Created primarily to define StarCraft 2 (SC2) datasets.
+  Directory restructuring tool used in order to flatten the structure. Saves
+  the mapping of the old directory structure to a separate file. Used to ease
+  processing with other tools. Can be used to extract additional meaning from
+  the directory structure in case of tournament replaypacks. Created primarily
+  to define StarCraft 2 (SC2) datasets.
 
 Options:
-  --input_path DIRECTORY         Please provide input path to the dataset that
-                                 is going to be processed.  [required]
-  --output_path DIRECTORY        Please provide output path where the tool
-                                 will put files after processing.  [required]
-  --file_extension TEXT          Specify file extension for the files that
-                                 will be put to the top level directory.
-                                 [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --input_path DIRECTORY         Input path to the dataset that is going to be
+                                 processed.  [required]
+  --output_path DIRECTORY        Output path where the tool will put files
+                                 after processing.  [required]
+  --file_extension TEXT          File extension for the files that will be put
+                                 to the top level directory. Example
+                                 ('.SC2Replay').  [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level. Default is WARN.
   --help                         Show this message and exit.
 ```
 
@@ -36,9 +38,9 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 ## Script Docker Image
 
-Buil the docker image:
+Build the docker image:
 ```bash
-docker build --tag=datasetpreparator:directory_flattener .
+docker build --tag=datasetpreparator:latest .
 ```
 
 Run the docker image (please replace `<paths>`):
@@ -46,7 +48,7 @@ Run the docker image (please replace `<paths>`):
 docker run
     -v "<./input>:/app/input" \
     -v "<./output>:/app/output" \
-    datasetpreparator:directory_flattener \
+    datasetpreparator:latest \
     python3 directory_flattener.py \
     --input_dir /app/input \
     --output_dir /app/output \
diff --git a/src/datasetpreparator/directory_packager/README.md b/src/datasetpreparator/directory_packager/README.md
index 1de8eb7..977ea89 100644
--- a/src/datasetpreparator/directory_packager/README.md
+++ b/src/datasetpreparator/directory_packager/README.md
@@ -4,19 +4,18 @@ Utility script for compressing a directory into a `.zip` archive. This script it
 
 # CLI Usage
 
-Please keep in mind that the  ```src/dir_packager.py``` contains default flag values and can be customized with the following command line flags:
+Please keep in mind that the  ```src/dir_packager.py``` contains required argument values and can be customized with the following command line interaface:
 ```
 Usage: directory_packager.py [OPTIONS]
 
-Tool used for processing StarCraft 2 (SC2) datasets. with
-https://github.com/Kaszanas/SC2InfoExtractorGo
+  Tool that packages directories into .zip archives. Each directory in the
+  input path is packaged into a separate .zip archive.
 
 Options:
-  --input_path DIRECTORY         Please provide input path to the directory
-                                 containing the dataset that is going to be
-                                 processed by packaging into .zip archives.
-                                 [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --input_path DIRECTORY         Input path to the directory containing the
+                                 dataset that is going to be processed by
+                                 packaging into .zip archives.  [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level. Default is WARN.
   --help                         Show this message and exit.
 ```
 
@@ -31,14 +30,14 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 ## Script Docker Image
 
-Buil the docker image:
+Build the docker image:
 ```bash
-docker build --tag=datasetpreparator:dir_packager .
+docker build --tag=datasetpreparator:latest .
 ```
 
 Run the docker image (please replace `<paths>`):
 ```bash
 docker run -v "<./input>:/app/input" \
-    datasetpreparator:dir_packager \
+    datasetpreparator:latest \
     python3 dir_packager.py --input_dir /app/input
 ```
diff --git a/src/datasetpreparator/file_renamer/README.md b/src/datasetpreparator/file_renamer/README.md
index f131774..90be4af 100644
--- a/src/datasetpreparator/file_renamer/README.md
+++ b/src/datasetpreparator/file_renamer/README.md
@@ -2,19 +2,19 @@
 
 # CLI Usage
 
-Please keep in mind that the  ```src/file_renamer.py``` contains default flag values and can be customized with the following command line flags:
+Please keep in mind that the  ```src/file_renamer.py``` contains required argument values and can be customized with the following command line interaface:
 ```
 Usage: file_renamer.py [OPTIONS]
 
-Tool used for processing StarCraft 2 (SC2) datasets. with
-https://github.com/Kaszanas/SC2InfoExtractorGo
+  Tool used for renaming auxilliary files (log files) that are produced when
+  creating StarCraft 2 (SC2) datasets with
+  https://github.com/Kaszanas/SC2InfoExtractorGo
 
 Options:
-  --input_path DIRECTORY         Please provide input path to the directory
-                                 containing the dataset that is going to be
-                                 processed by packaging into .zip archives.
-                                 [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --input_path DIRECTORY         Input path to the directory containing the
+                                 dataset that is going to be processed by
+                                 packaging into .zip archives.  [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level. Default is WARN.
   --help                         Show this message and exit.
 ```
 
@@ -29,14 +29,14 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 ## Script Docker Image
 
-Buil the docker image:
+Build the docker image:
 ```bash
-docker build --tag=datasetpreparator:file_renamer .
+docker build --tag=datasetpreparator:latest .
 ```
 
 Run the docker image (please replace `<paths>`):
 ```bash
 docker run -v "<./input>:/app/input" \
-    datasetpreparator:file_renamer \
+    datasetpreparator:latest \
     python3 file_renamer.py --input_dir /app/input
 ```
diff --git a/src/datasetpreparator/json_merger/README.md b/src/datasetpreparator/json_merger/README.md
index a8a85fd..99d721a 100644
--- a/src/datasetpreparator/json_merger/README.md
+++ b/src/datasetpreparator/json_merger/README.md
@@ -4,23 +4,22 @@ Utility script that is merging two JSON files into an output JSON file.
 
 # CLI Usage
 
-Please keep in mind that the  ```src/json_merger.py``` contains default flag values and can be customized with the following command line flags:
+Please keep in mind that the  ```src/json_merger.py``` contains required argument values and can be customized with the following command line interaface:
 ```
 Usage: json_merger.py [OPTIONS]
 
-Tool used for merging two .json files. Created in order to merge two
-mappings created by https://github.com/Kaszanas/SC2MapLocaleExtractor
+  Tool used for merging two .json files. Originally used to merge two json
+  files created by https://github.com/Kaszanas/SC2MapLocaleExtractor
 
 Options:
-  --json_one FILE                Please provide the path to the first .json
-                                 file that is going to be merged.  [required]
-  --json_two FILE                Please provide the path to the second .json
-                                 file that is going to be merged.  [required]
-  --output_filepath FILE         Please provide a filepath to which the result
-                                 JSON file will be saved, note that any
-                                 existing file of the same name will be
-                                 overwriten.  [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --json_one FILE                Path to the first .json file that is going to
+                                 be merged.  [required]
+  --json_two FILE                Path to the second .json file that is going
+                                 to be merged.  [required]
+  --output_filepath FILE         Filepath to which the result JSON file will
+                                 be saved, note that any existing file of the
+                                 same name will be overwriten.  [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level. Default is WARN.
   --help                         Show this message and exit.
 ```
 
@@ -35,15 +34,15 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 ## Script Docker Image
 
-Buil the docker image:
+Build the docker image:
 ```bash
-docker build --tag=datasetpreparator:json_merger .
+docker build --tag=datasetpreparator:latest .
 ```
 
 Run the docker image (please replace `<paths>`):
 ```bash
 docker run -v "<./input>:/app/input" \
-    datasetpreparator:file_packager \
+    datasetpreparator:latest \
     python3 json_merger.py \
     --json_one /app/input/json1.json \
     --json_two /app/input/json2.json \
diff --git a/src/datasetpreparator/processed_mapping_copier/README.md b/src/datasetpreparator/processed_mapping_copier/README.md
index d393cbb..2ce00ef 100644
--- a/src/datasetpreparator/processed_mapping_copier/README.md
+++ b/src/datasetpreparator/processed_mapping_copier/README.md
@@ -4,22 +4,22 @@ Utility script that enters each of the processed replaypack directories and copi
 
 # CLI Usage
 
-Please keep in mind that the  ```src/processed_mapping_copier.py``` contains default flag values and can be customized with the following command line flags:
+Please keep in mind that the  ```src/processed_mapping_copier.py``` contains required argument values and can be customized with the following command line interaface:
 ```
 Usage: processed_mapping_copier.py [OPTIONS]
 
-Tool for copying the processed_mapping.json files to the matching directory
-after processing the replaypack into a JSON dataset. This step is required
-to define the StarCraft 2 (SC2) dataset.
+  Tool for copying the auxilliary file of processed_mapping.json to the
+  matching directory after processing the replaypack into a JSON dataset with
+  sc2egset_replaypack_processor.py. This script is required to reproduce
+  SC2EGSet Dataset.
 
 Options:
-  --input_path DIRECTORY         Please provide input path to the flattened
-                                 replaypacks that contain
-                                 procesed_mapping.json files.  [required]
-  --output_path DIRECTORY        Please provide output path where
-                                 processed_mapping.json will be copied.
+  --input_path DIRECTORY         Input path to the flattened replaypacks that
+                                 contain procesed_mapping.json files.
                                  [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --output_path DIRECTORY        Output path where processed_mapping.json will
+                                 be copied.  [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level. Default is WARN.
   --help                         Show this message and exit.
 ```
 
@@ -34,9 +34,9 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 ## Script Docker Image
 
-Buil the docker image:
+Build the docker image:
 ```bash
-docker build --tag=datasetpreparator:processed_mapping_copier .
+docker build --tag=datasetpreparator:latest .
 ```
 
 Run the docker image (please replace `<paths>`):
@@ -44,7 +44,7 @@ Run the docker image (please replace `<paths>`):
 docker run \
     -v "<./input>:/app/input" \
     -v "<./output>:/app/output" \
-    datasetpreparator:file_packager \
+    datasetpreparator:latest \
     python3 processed_mapping_copier.py \
     --input_dir /app/input \
     --output_dir /app/output
diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/README.md b/src/datasetpreparator/sc2/sc2_map_downloader/README.md
index 59fc57c..e1fc2e9 100644
--- a/src/datasetpreparator/sc2/sc2_map_downloader/README.md
+++ b/src/datasetpreparator/sc2/sc2_map_downloader/README.md
@@ -4,20 +4,21 @@ Utility script that opens each of the provided replays and downloads the map fro
 
 # CLI Usage
 
-Please keep in mind that the  ```src/sc2_map_downloader.py``` does not contain default flag values and can be customized with the following command line flags:
+Please keep in mind that the  ```src/sc2_map_downloader.py``` contains required argument values and can be customized with the following command line interaface:
 ```
 Usage: sc2_map_downloader.py [OPTIONS]
 
-Tool for downloading StarCraft 2 (SC2) maps based on the data that is
-available within .SC2Replay file.
+  Tool for downloading StarCraft 2 (SC2) maps based on the data that available
+  within .SC2Replay files.
 
 Options:
-  --input_path DIRECTORY         Please provide input path to the dataset that
-                                 is going to be processed.  [required]
-  --output_path DIRECTORY        Please provide output path where StarCraft 2
-                                 (SC2) map files will be downloaded.
+  --input_path DIRECTORY         Input path to the dataset that is going to be
+                                 processed. The script will find all
+                                 .SC2Replay files in the directory.
                                  [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --output_path DIRECTORY        Output path where StarCraft 2 (SC2) map files
+                                 will be downloaded.  [required]
+  --log [INFO|DEBUG|ERROR|WARN]  Log level. Default is WARN.
   --help                         Show this message and exit.
 ```
 
@@ -32,9 +33,9 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 ## Script Docker Image
 
-Buil the docker image:
+Build the docker image:
 ```bash
-docker build --tag=datasetpreparator:sc2_map_downloader .
+docker build --tag=datasetpreparator:latest .
 ```
 
 Run the docker image (please replace `<paths>`):
@@ -42,7 +43,7 @@ Run the docker image (please replace `<paths>`):
 docker run \
     -v "<./input>:/app/input" \
     -v "<./output>:/app/output" \
-    datasetpreparator:sc2_map_downloader \
+    datasetpreparator:latest \
     python3 sc2_map_downloader.py \
     --input_dir /app/input \
     --output_dir /app/output
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
index 9a2ba1d..e140f45 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/README.md
@@ -4,24 +4,28 @@ Utility script that leverages the [SC2InfoExtractorGo](https://github.com/Kaszan
 
 # CLI Usage
 
-Please keep in mind that the ```src/sc2_replaypack_processor.py```  contains default flag values and can be customized with the following command line flags:
+Please keep in mind that the ```sc2_replaypack_processor.py``` contains required argument values and can be customized with the following command line interaface:
 ```
 Usage: sc2egset_replaypack_processor.py [OPTIONS]
 
-Tool used to execute SC2InfoExtractorGo
-(https://github.com/Kaszanas/SC2InfoExtractorGo) on multiple replaypack
-directories. Assists in processing StarCraft 2 (SC2) datasets.
+  Tool used to recreate SC2ReSet and SC2EGSet Dataset. Executes the entire
+  pipeline for replay processing. Depends on SC2InfoExtractorGo
+  (https://github.com/Kaszanas/SC2InfoExtractorGo) which is executed on
+  multiple replaypack directories in the process. Assists in processing
+  StarCraft 2 (SC2) datasets.
 
 Options:
-  --input_path DIRECTORY         Please provide an output directory for the
-                                 resulting files.  [required]
-  --output_path DIRECTORY        Please provide output path where StarCraft 2
-                                 (SC2) map files will be downloaded.
+  --input_path DIRECTORY         Input directory containing multiple StarCraft 2
+                                 replaypacks. These files will be processed
+                                 exactly the same as SC2ReSet and SC2EGSet
+                                 datasets.  [required]
+  --output_path DIRECTORY        Output path where the tool will place the
+                                 processed files for SC2ReSet and SC2EGSet
+                                 dataset as children directories.  [required]
+  --n_processes INTEGER          Number of processes to be spawned for the
+                                 dataset processing with SC2InfoExtractorGo.
                                  [required]
-  --n_processes INTEGER          Please provide the number of processes to be
-                                 spawned for the dataset processing.
-                                 [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --log [INFO|DEBUG|ERROR|WARN]  Log level. Default is WARN.
   --help                         Show this message and exit.
 ```
 
@@ -36,14 +40,14 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 ## Script Docker Image
 
-Buil the docker image:
+Build the docker image:
 ```bash
-docker build --tag=datasetpreparator:sc2_replaypack_processor .
+docker build --tag=datasetpreparator:latest .
 ```
 
 Run the docker image (please replace `<paths>`):
 ```bash
 docker run -v "<./input>:/app/input" \
-    datasetpreparator:sc2_replaypack_processor \
+    datasetpreparator:latest \
     python3 sc2_replaypack_processor.py --input_dir /app/input
 ```
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
index 8047f8e..c29dc50 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/README.md
@@ -4,21 +4,21 @@ Utility script, downloads the contents of SC2ReSet replaypack from a Zenodo repo
 
 # CLI Usage
 
-Please keep in mind that the ```src/sc2reset_replaypack_downloader.py```  contains default flag values and can be customized with the following command line flags:
+Please keep in mind that the ```sc2reset_replaypack_downloader.py``` contains required argument values and can be customized with the following command line interaface:
 ```
 Usage: sc2reset_replaypack_downloader.py [OPTIONS]
 
-Tool used for downloading SC2ReSet: StarCraft II Esport Replaypack Set
-(https://zenodo.org/doi/10.5281/zenodo.5575796).
+  Tool used for downloading SC2ReSet: StarCraft II Esport Replaypack Set
+  (https://zenodo.org/doi/10.5281/zenodo.5575796).
 
 Options:
-  --download_path DIRECTORY      Please provide a path to which the archives
-                                 will be downloaded.  [required]
-  --unpack_path DIRECTORY        Please provide a path to which the archives
-                                 will be unpacked.  [required]
+  --download_path DIRECTORY      Path to which the archives will be
+                                 downloaded.  [required]
+  --unpack_path DIRECTORY        Path to which the archives will be unpacked.
+                                 [required]
   --n_workers INTEGER            Number of workers used for extracting the
                                  .zip archives.  [required]
-  --log [INFO|DEBUG|ERROR|WARN]  Log level
+  --log [INFO|DEBUG|ERROR|WARN]  Log level. Default is WARN.
   --help                         Show this message and exit.
 ```
 
@@ -33,15 +33,15 @@ Please refer to the main [README](../../README.md) for the instructions.
 
 ## Script Docker Image
 
-Buil the docker image:
+Build the docker image:
 ```bash
-docker build --tag=datasetpreparator:sc2reset_replaypack_downloader .
+docker build --tag=datasetpreparator:latest .
 ```
 
 Run the docker image (please replace `<paths>`):
 ```bash
 docker run -v "<./input>:/app/input" \
-    datasetpreparator:sc2reset_replaypack_downloader \
+    datasetpreparator:latest \
     python3 sc2reset_replaypack_downloader.py --input_dir /app/input
 ```
 

From 18a6abc342de32450a2436a39472d25238a2c4b6 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 5 Jan 2025 20:12:48 +0100
Subject: [PATCH 61/92] build: removed dockerfiles per script, using main
 dockerfile

---
 .../directory_flattener/Dockerfile            |  9 ---------
 .../directory_packager/Dockerfile             |  9 ---------
 src/datasetpreparator/file_renamer/Dockerfile |  9 ---------
 src/datasetpreparator/json_merger/Dockerfile  |  9 ---------
 .../processed_mapping_copier/Dockerfile       |  9 ---------
 .../sc2/sc2_map_downloader/Dockerfile         |  9 ---------
 .../sc2egset_replaypack_processor/Dockerfile  | 20 -------------------
 .../sc2reset_replaypack_downloader/Dockerfile |  9 ---------
 8 files changed, 83 deletions(-)
 delete mode 100644 src/datasetpreparator/directory_flattener/Dockerfile
 delete mode 100644 src/datasetpreparator/directory_packager/Dockerfile
 delete mode 100644 src/datasetpreparator/file_renamer/Dockerfile
 delete mode 100644 src/datasetpreparator/json_merger/Dockerfile
 delete mode 100644 src/datasetpreparator/processed_mapping_copier/Dockerfile
 delete mode 100644 src/datasetpreparator/sc2/sc2_map_downloader/Dockerfile
 delete mode 100644 src/datasetpreparator/sc2/sc2egset_replaypack_processor/Dockerfile
 delete mode 100644 src/datasetpreparator/sc2/sc2reset_replaypack_downloader/Dockerfile

diff --git a/src/datasetpreparator/directory_flattener/Dockerfile b/src/datasetpreparator/directory_flattener/Dockerfile
deleted file mode 100644
index f317832..0000000
--- a/src/datasetpreparator/directory_flattener/Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM python:latest
-
-RUN pip install click tqdm
-
-WORKDIR /app
-
-COPY . .
-
-CMD ["python3", "directory_flattener.py"]
diff --git a/src/datasetpreparator/directory_packager/Dockerfile b/src/datasetpreparator/directory_packager/Dockerfile
deleted file mode 100644
index a470732..0000000
--- a/src/datasetpreparator/directory_packager/Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM python:latest
-
-RUN pip install click
-
-WORKDIR /app
-
-COPY . .
-
-CMD ["python3", "directory_packager.py"]
diff --git a/src/datasetpreparator/file_renamer/Dockerfile b/src/datasetpreparator/file_renamer/Dockerfile
deleted file mode 100644
index b4b0a06..0000000
--- a/src/datasetpreparator/file_renamer/Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM python:latest
-
-RUN pip install click
-
-WORKDIR /app
-
-COPY . .
-
-CMD ["python3", "file_renamer.py"]
diff --git a/src/datasetpreparator/json_merger/Dockerfile b/src/datasetpreparator/json_merger/Dockerfile
deleted file mode 100644
index f14eb05..0000000
--- a/src/datasetpreparator/json_merger/Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM python:latest
-
-RUN pip install click
-
-WORKDIR /app
-
-COPY . .
-
-CMD ["python3", "json_merger.py"]
diff --git a/src/datasetpreparator/processed_mapping_copier/Dockerfile b/src/datasetpreparator/processed_mapping_copier/Dockerfile
deleted file mode 100644
index 298d941..0000000
--- a/src/datasetpreparator/processed_mapping_copier/Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM python:latest
-
-RUN pip install click
-
-WORKDIR /app
-
-COPY . .
-
-CMD ["python3", "processed_mapping_copier.py"]
diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/Dockerfile b/src/datasetpreparator/sc2/sc2_map_downloader/Dockerfile
deleted file mode 100644
index f50f61c..0000000
--- a/src/datasetpreparator/sc2/sc2_map_downloader/Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM python:latest
-
-WORKDIR /app
-
-RUN pip install click requests sc2reader
-
-COPY . .
-
-CMD ["python3", "sc2_map_downloader.py"]
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/Dockerfile b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/Dockerfile
deleted file mode 100644
index 27797af..0000000
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/Dockerfile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Built .exe replay parsing tool is required to run sc2_replaypack_processor
-# https://github.com/Kaszanas/SC2InfoExtractorGo
-FROM sc2-info-extractor
-
-FROM python:3.10-alpine
-
-WORKDIR /sc2_dataset_preparator
-
-# Copying the replay parsing tool:
-COPY --from=0 /SC2InfoExtractorGo /SC2InfoExtractorGo
-
-# Installing Python dependencies:
-RUN pip install click tqdm
-
-# Copying all Python scripts
-COPY . .
-
-WORKDIR /sc2_dataset_preparator/src
-
-CMD ["python3", "sc2egset_replaypack_processor.py"]
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/Dockerfile b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/Dockerfile
deleted file mode 100644
index 34818cc..0000000
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM python:latest
-
-WORKDIR /app
-
-RUN pip install click requests tqdm
-
-COPY . .
-
-CMD ["python3", "sc2reset_replaypack_downloader.py"]

From 4d7b41f26a912c0d24cf13d5662bb7e5b7d55454 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 5 Jan 2025 20:13:19 +0100
Subject: [PATCH 62/92] refactor: drafting refactor of
 sc2egset_replaypack_processor

---
 .../sc2egset_replaypack_processor.py          | 43 +++++++++++++++----
 .../utils/replaypack_processor_args.py        |  2 +
 2 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 6319683..092cad7 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -14,6 +14,14 @@
     pre_process_download_maps,
 )
 
+from datasetpreparator.directory_flattener.directory_flattener import (
+    multiple_directory_flattener,
+)
+
+from datasetpreparator.directory_packager.directory_packager import (
+    multiple_dir_packager,
+)
+
 
 def define_sc2egset_args(
     input_path: Path,
@@ -56,6 +64,16 @@ def define_sc2egset_args(
     return sc2_info_extractor_go_args
 
 
+def sc2info_extractor_go_map_download(arguments: ReplaypackProcessorArguments):
+    # Pre-process, download all maps:
+    logging.info("Downloading all maps...")
+    map_download_arguments = SC2InfoExtractorGoArguments.get_download_maps_args(
+        processing_input=arguments.input_path, output=arguments.output_path
+    )
+    pre_process_download_maps(arguments=map_download_arguments)
+    pass
+
+
 def sc2egset_replaypack_processor(
     arguments: ReplaypackProcessorArguments,
 ):
@@ -84,13 +102,6 @@ def sc2egset_replaypack_processor(
         if sc2_info_extractor_go_args is not None:
             multiprocessing_list.append(sc2_info_extractor_go_args)
 
-    # Pre-process, download all maps:
-    logging.info("Downloading all maps...")
-    map_download_arguments = SC2InfoExtractorGoArguments.get_download_maps_args(
-        processing_input=arguments.input_path, output=arguments.output_path
-    )
-    pre_process_download_maps(arguments=map_download_arguments)
-
     # Run processing with multiple SC2InfoExtractorGo instances:
     multiprocessing_scheduler(multiprocessing_list, int(n_processes))
 
@@ -146,14 +157,28 @@ def main(
         raise ValueError(f"Invalid log level: {numeric_level}")
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
-    # TODO: Recreate the entire pipeline for SC2ReSet and SC2EGSet:
-
     arguments = ReplaypackProcessorArguments(
         input_path=input_path,
         output_path=output_path,
         n_processes=n_processes,
+        maps_directory="",
     )
 
+    # TODO: Recreate the entire pipeline for SC2ReSet and SC2EGSet:
+    # REVIEW: Note that the Chinese maps need to be pre-seeded so that they can be
+    # hosted later on.
+
+    # Directory flattener:
+    multiple_directory_flattener()
+
+    # Download all maps for multiprocess, map files are used as a source of truth for
+    # SC2InfoExtractorGo downloading mechanism:
+    sc2info_extractor_go_map_download(arguments=arguments)
+
+    # Package SC2ReSet and the downloaded maps, move to the output directory:
+    multiple_dir_packager(input_path="")
+
+    # Process SC2EGSet, this will use the same map directory as the previous step:
     sc2egset_replaypack_processor(arguments=arguments)
 
 
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
index 1575fca..6947289 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
@@ -163,8 +163,10 @@ def __init__(
         self,
         input_path: Path,
         output_path: Path,
+        maps_directory: Path,
         n_processes: int,
     ):
         self.input_path = input_path
         self.output_path = output_path
         self.n_processes = n_processes
+        self.maps_directory = maps_directory

From 828a356bd217971b16276eb6c6c44dd6417950af Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 5 Jan 2025 23:13:15 +0100
Subject: [PATCH 63/92] feat: added processed_mapping_copier target to makefile

---
 makefile | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/makefile b/makefile
index f3ba8de..b2657af 100644
--- a/makefile
+++ b/makefile
@@ -54,6 +54,18 @@ process_replaypacks: ## Parses the raw (.SC2Replay) data into JSON files.
 		--output_dir ./processing/sc2egset_replaypack_processor/output \
 		--n_processes 8 \
 
+.PHONY: processed_mapping_copier
+processed_mapping_copier:
+	@echo "Copying the processed mapping files."
+	@make docker_pull_dev
+	@echo "Using the dev branch Docker image: $(DEV_BRANCH_CONTAINER)"
+	docker run --rm\
+		-v "./processing:/app/processing" \
+		$(DEV_BRANCH_CONTAINER) \
+		python3 processed_mapping_copier.py \
+		--input_dir ./processing/directory_flattener/output \
+		--output_dir ./processing/sc2egset_replaypack_processor/output
+
 .PHONY: rename_files
 rename_files: ## Renames the files after processing with SC2InfoExtractorGo.
 	@echo "Renaming the files."
@@ -65,6 +77,7 @@ rename_files: ## Renames the files after processing with SC2InfoExtractorGo.
 		python3 file_renamer.py \
 		--input_dir ./processing/sc2egset_replaypack_processor/output
 
+
 .PHONY: package_sc2egset_dataset
 package_sc2egset_dataset: ## Packages the pre-processed dataset from the output of datasetpreparator. Used to prepare SC2EGSet Dataset.
 	@echo "Packaging the dataset."

From d4e3cb7b5f3f3484e65ec83e52435e22da39b096 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Sun, 5 Jan 2025 23:14:17 +0100
Subject: [PATCH 64/92] feat: draft functionality of sc2egset_replaypack...
 full pipeline

BREAKING CHANGE:
---
 .../sc2egset_replaypack_processor.py          | 55 ++++++++++++++++++-
 .../utils/multiprocess.py                     |  2 +
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 092cad7..079ac8d 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -22,6 +22,12 @@
     multiple_dir_packager,
 )
 
+from datasetpreparator.processed_mapping_copier.processed_mapping_copier import (
+    processed_mapping_copier,
+)
+
+from datasetpreparator.file_renamer.file_renamer import file_renamer
+
 
 def define_sc2egset_args(
     input_path: Path,
@@ -107,7 +113,7 @@ def sc2egset_replaypack_processor(
 
 
 @click.command(
-    help="Tool used to recreate SC2ReSet and SC2EGSet Dataset. Executes the entire pipeline for replay processing. Depends on SC2InfoExtractorGo (https://github.com/Kaszanas/SC2InfoExtractorGo) which is executed on multiple replaypack directories in the process. Assists in processing StarCraft 2 (SC2) datasets."
+    help="Tool used to recreate SC2ReSet and SC2EGSet Dataset. Depends on SC2InfoExtractorGo (https://github.com/Kaszanas/SC2InfoExtractorGo) which is executed on multiple replaypack directories in the process. Entire pipeline for replay processing runs with the command line arguments used to create SC2EGSet. Assists in processing StarCraft 2 (SC2) datasets."
 )
 @click.option(
     "--input_path",
@@ -157,11 +163,20 @@ def main(
         raise ValueError(f"Invalid log level: {numeric_level}")
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
+    # Create output directory if it does not exist:
+    if not output_path.exists():
+        output_path.mkdir(exist_ok=True)
+
+    input_path = Path(input_path).resolve()
+    output_path = Path(output_path).resolve()
+
+    maps_output_path = Path(output_path, "maps").resolve()
+
     arguments = ReplaypackProcessorArguments(
         input_path=input_path,
         output_path=output_path,
         n_processes=n_processes,
-        maps_directory="",
+        maps_directory=maps_output_path,
     )
 
     # TODO: Recreate the entire pipeline for SC2ReSet and SC2EGSet:
@@ -169,18 +184,52 @@ def main(
     # hosted later on.
 
     # Directory flattener:
-    multiple_directory_flattener()
+    directory_flattener_output_path = Path(
+        output_path, "directory_flattener_output"
+    ).resolve()
+    # REVIEW: Should it be ok if the directory exists?
+    # it may contain some files that should not be overwritten:
+    # --force as a flag in CLI
+    # input command waiting for user input to confirm potential overwrite:
+    #
+
+    if not directory_flattener_output_path.exists():
+        directory_flattener_output_path.mkdir(exist_ok=True)
+
+    # TODO: Check if the output directory is not empty, if it is you can proceed
+    # if the directory is not empty issue a warning with confirmation prompt.
+
+    # if not empty and not force:
+    #   prompt user to confirm overwrite
+
+    logging.info("Flattening directories...")
+    multiple_directory_flattener(
+        input_path=input_path,
+        output_path=directory_flattener_output_path,
+        file_extension=".SC2Replay",
+    )
 
     # Download all maps for multiprocess, map files are used as a source of truth for
     # SC2InfoExtractorGo downloading mechanism:
+    logging.info("Downloading all maps using SC2InfoExtractorGo...")
     sc2info_extractor_go_map_download(arguments=arguments)
 
     # Package SC2ReSet and the downloaded maps, move to the output directory:
+    logging.info("Packaging SC2ReSet and the downloaded maps...")
     multiple_dir_packager(input_path="")
 
     # Process SC2EGSet, this will use the same map directory as the previous step:
+    logging.info("Processing SC2EGSet using SC2InfoExtractorGo...")
     sc2egset_replaypack_processor(arguments=arguments)
 
+    # Processed Mapping Copier:
+    logging.info("Copying processed_mapping.json files...")
+    processed_mapping_copier(input_path="", output_path="")
+
+    # File Renamer:
+    logging.info("Renaming auxilliary (log) files...")
+    file_renamer(input_path="")
+
 
 if __name__ == "__main__":
     main()
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
index 7eaa6c0..de58ef3 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
@@ -65,6 +65,8 @@ def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
         directory,
         output_directory_filepath,
     )
+
+    # TODO: Check if I can do a pipe from the subprocess to get multiple progress bars:
     subprocess.run(
         [
             # FIXME hardcoded binary name

From 67ec3e00aada1f32b59e70f9884ff77867f1a774 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 21:34:42 +0100
Subject: [PATCH 65/92] feat: drafted utils/user_prompt

---
 src/datasetpreparator/utils/__init__.py    |  0
 src/datasetpreparator/utils/user_prompt.py | 49 ++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 src/datasetpreparator/utils/__init__.py
 create mode 100644 src/datasetpreparator/utils/user_prompt.py

diff --git a/src/datasetpreparator/utils/__init__.py b/src/datasetpreparator/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/datasetpreparator/utils/user_prompt.py b/src/datasetpreparator/utils/user_prompt.py
new file mode 100644
index 0000000..c4ee34b
--- /dev/null
+++ b/src/datasetpreparator/utils/user_prompt.py
@@ -0,0 +1,49 @@
+from pathlib import Path
+
+
+def user_prompt_possible_overwrite(filepath: Path, force: bool) -> bool:
+    """
+    Prompts the user to confirm if they want to potentially overwrite a file or directory if
+    it already exists.
+
+    Parameters
+    ----------
+    filepath : Path
+        Filepath to the file or directory that might be overwritten if it already exists.
+    force : bool
+        Flag that specifies if the user wants to overwrite the file or directory without
+        being prompted.
+
+    Returns
+    -------
+    bool
+        True if the file is permitted to be overwritten, False otherwise.
+    """
+
+    # User passed the force flag, so the files or directories can be overwritten:
+    if force:
+        return True
+
+    # File or directory does not exist, so it can be created,
+    # there is no risk of overwriting anything:
+    if not filepath.exists():
+        return True
+
+    return_map = {
+        "y": True,
+        "n": False,
+    }
+
+    # File or directory exists, so we need to prompt the user to confirm
+    # if they want to potentially overwrite something and loose data:
+    if filepath.exists():
+        print(f"File {filepath} already exists.")
+        user_input = input("Do you want to overwrite it? (y/n): ")
+
+        if user_input.lower() in return_map:
+            return return_map[user_input.lower()]
+
+        print("Invalid input, please type 'y' or 'n'.")
+        return user_prompt_possible_overwrite(filepath, force)
+
+    return False

From 76be1bc9c0fd43aae391c1575c7cac0a0a2daf3c Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 21:42:13 +0100
Subject: [PATCH 66/92] refactor: renamed user prompting function

---
 src/datasetpreparator/utils/user_prompt.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/datasetpreparator/utils/user_prompt.py b/src/datasetpreparator/utils/user_prompt.py
index c4ee34b..5c8b76c 100644
--- a/src/datasetpreparator/utils/user_prompt.py
+++ b/src/datasetpreparator/utils/user_prompt.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 
 
-def user_prompt_possible_overwrite(filepath: Path, force: bool) -> bool:
+def user_prompt_overwrite_ok(filepath: Path, force: bool) -> bool:
     """
     Prompts the user to confirm if they want to potentially overwrite a file or directory if
     it already exists.
@@ -29,6 +29,10 @@ def user_prompt_possible_overwrite(filepath: Path, force: bool) -> bool:
     if not filepath.exists():
         return True
 
+    # Directory is empty, so it can be overwritten without any risk:
+    if filepath.is_dir() and len(list(filepath.iterdir())) == 0:
+        return True
+
     return_map = {
         "y": True,
         "n": False,
@@ -36,14 +40,13 @@ def user_prompt_possible_overwrite(filepath: Path, force: bool) -> bool:
 
     # File or directory exists, so we need to prompt the user to confirm
     # if they want to potentially overwrite something and loose data:
-    if filepath.exists():
-        print(f"File {filepath} already exists.")
-        user_input = input("Do you want to overwrite it? (y/n): ")
+    print(f"File {filepath} already exists.")
+    user_input = input("Do you want to overwrite it? (y/n): ")
 
-        if user_input.lower() in return_map:
-            return return_map[user_input.lower()]
+    if user_input.lower() in return_map:
+        return return_map[user_input.lower()]
 
-        print("Invalid input, please type 'y' or 'n'.")
-        return user_prompt_possible_overwrite(filepath, force)
+    print("Invalid input, please type 'y' or 'n'.")
+    return user_prompt_overwrite_ok(filepath, force)
 
     return False

From 78a8d00b01cbd8bb8249f9b6d4b110b95af3484e Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 21:42:51 +0100
Subject: [PATCH 67/92] refactor: applied user prompting in
 sc2egset_replaypack_processor

---
 .../sc2egset_replaypack_processor.py          | 83 +++++--------------
 .../utils/download_maps.py                    | 17 ++++
 .../utils/replaypack_processor_args.py        | 61 ++++++++++++++
 3 files changed, 97 insertions(+), 64 deletions(-)
 create mode 100644 src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/download_maps.py

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 079ac8d..a321dc8 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -1,17 +1,18 @@
-import os
 from pathlib import Path
 import logging
 import click
 from tqdm import tqdm
 
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.download_maps import (
+    sc2info_extractor_go_map_download,
+)
 from datasetpreparator.settings import LOGGING_FORMAT
 from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
     ReplaypackProcessorArguments,
-    SC2InfoExtractorGoArguments,
+    define_sc2egset_args,
 )
 from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.multiprocess import (
     multiprocessing_scheduler,
-    pre_process_download_maps,
 )
 
 from datasetpreparator.directory_flattener.directory_flattener import (
@@ -27,57 +28,7 @@
 )
 
 from datasetpreparator.file_renamer.file_renamer import file_renamer
-
-
-def define_sc2egset_args(
-    input_path: Path,
-    output_path: Path,
-    arguments: ReplaypackProcessorArguments,
-    maybe_dir: Path,
-) -> ReplaypackProcessorArguments | None:
-    logging.debug(f"Processing entry: {maybe_dir}")
-    processing_input_dir = Path(input_path, maybe_dir).resolve()
-    if not processing_input_dir.is_dir():
-        logging.debug("Entry is not a directory, skipping!")
-        return None
-
-    logging.debug(f"Output dir: {output_path}")
-    # Create the main output directory:
-    if not output_path.exists():
-        output_path.mkdir()
-
-    # TODO: use pathlib:
-    path, output_directory_name = os.path.split(maybe_dir)
-    logging.debug(f"Output dir name: {output_directory_name}")
-    if output_directory_name == "input":
-        return None
-
-    output_directory_with_name = Path(output_path, output_directory_name).resolve()
-    logging.debug(f"Output filepath: {output_directory_with_name}")
-
-    # Create the output subdirectories:
-    if not output_directory_with_name.exists():
-        output_directory_with_name.mkdir()
-
-    sc2_info_extractor_go_args = (
-        SC2InfoExtractorGoArguments.get_sc2egset_processing_args(
-            processing_input=processing_input_dir,
-            output=output_directory_with_name,
-            perform_chat_anonymization=arguments.perform_chat_anonymization,
-        )
-    )
-
-    return sc2_info_extractor_go_args
-
-
-def sc2info_extractor_go_map_download(arguments: ReplaypackProcessorArguments):
-    # Pre-process, download all maps:
-    logging.info("Downloading all maps...")
-    map_download_arguments = SC2InfoExtractorGoArguments.get_download_maps_args(
-        processing_input=arguments.input_path, output=arguments.output_path
-    )
-    pre_process_download_maps(arguments=map_download_arguments)
-    pass
+from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
 
 def sc2egset_replaypack_processor(
@@ -93,15 +44,9 @@ def sc2egset_replaypack_processor(
         Specifies the arguments as per the ReplaypackProcessorArguments class fields.
     """
 
-    input_path = arguments.input_path
-    output_path = arguments.output_path
-    n_processes = arguments.n_processes
-
     multiprocessing_list = []
-    for maybe_dir in tqdm(list(input_path.iterdir())):
+    for maybe_dir in tqdm(list(arguments.input_path.iterdir())):
         sc2_info_extractor_go_args = define_sc2egset_args(
-            input_path=input_path,
-            output_path=output_path,
             arguments=arguments,
             maybe_dir=maybe_dir,
         )
@@ -109,7 +54,7 @@ def sc2egset_replaypack_processor(
             multiprocessing_list.append(sc2_info_extractor_go_args)
 
     # Run processing with multiple SC2InfoExtractorGo instances:
-    multiprocessing_scheduler(multiprocessing_list, int(n_processes))
+    multiprocessing_scheduler(multiprocessing_list, int(arguments.n_processes))
 
 
 @click.command(
@@ -146,6 +91,13 @@ def sc2egset_replaypack_processor(
     required=True,
     help="Number of processes to be spawned for the dataset processing with SC2InfoExtractorGo.",
 )
+@click.option(
+    "--force_overwrite",
+    type=bool,
+    default=False,
+    required=True,
+    help="Flag that specifies if the user wants to overwrite files or directories without being prompted.",
+)
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
@@ -156,6 +108,7 @@ def main(
     input_path: Path,
     output_path: Path,
     n_processes: int,
+    force_overwrite: bool,
     log: str,
 ) -> None:
     numeric_level = getattr(logging, log.upper(), None)
@@ -164,7 +117,7 @@ def main(
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
     # Create output directory if it does not exist:
-    if not output_path.exists():
+    if user_prompt_overwrite_ok(filepath=output_path, force=force_overwrite):
         output_path.mkdir(exist_ok=True)
 
     input_path = Path(input_path).resolve()
@@ -193,7 +146,9 @@ def main(
     # input command waiting for user input to confirm potential overwrite:
     #
 
-    if not directory_flattener_output_path.exists():
+    if user_prompt_overwrite_ok(
+        filepath=directory_flattener_output_path, force=force_overwrite
+    ):
         directory_flattener_output_path.mkdir(exist_ok=True)
 
     # TODO: Check if the output directory is not empty, if it is you can proceed
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/download_maps.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/download_maps.py
new file mode 100644
index 0000000..1f4db97
--- /dev/null
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/download_maps.py
@@ -0,0 +1,17 @@
+import logging
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.multiprocess import (
+    pre_process_download_maps,
+)
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
+    ReplaypackProcessorArguments,
+    SC2InfoExtractorGoArguments,
+)
+
+
+def sc2info_extractor_go_map_download(arguments: ReplaypackProcessorArguments):
+    # Pre-process, download all maps:
+    logging.info("Downloading all maps...")
+    map_download_arguments = SC2InfoExtractorGoArguments.get_download_maps_args(
+        processing_input=arguments.input_path, output=arguments.output_path
+    )
+    pre_process_download_maps(arguments=map_download_arguments)
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
index 6947289..e3462fd 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 import os
 
@@ -170,3 +171,63 @@ def __init__(
         self.output_path = output_path
         self.n_processes = n_processes
         self.maps_directory = maps_directory
+
+
+def define_sc2egset_args(
+    arguments: ReplaypackProcessorArguments,
+    maybe_dir: Path,
+) -> ReplaypackProcessorArguments | None:
+    """
+    Creates final ReplaypackProcessorArguments for SC2InfoExtractorGo. These arguments
+    are used to perform extraction of the data from a replaypack with the same arguments
+    as SC2EGSetDataset.
+
+    Parameters
+    ----------
+    arguments : ReplaypackProcessorArguments
+        Arguments to the command line tool
+    maybe_dir : Path
+        _description_
+
+    Returns
+    -------
+    ReplaypackProcessorArguments | None
+        _description_
+    """
+
+    input_path = arguments.input_path
+    output_path = arguments.output_path
+
+    logging.debug(f"Processing entry: {maybe_dir}")
+    processing_input_dir = Path(input_path, maybe_dir).resolve()
+    if not processing_input_dir.is_dir():
+        logging.debug("Entry is not a directory, skipping!")
+        return None
+
+    logging.debug(f"Output dir: {output_path}")
+    # Create the main output directory:
+    if not output_path.exists():
+        output_path.mkdir()
+
+    # TODO: use pathlib:
+    path, output_directory_name = os.path.split(maybe_dir)
+    logging.debug(f"Output dir name: {output_directory_name}")
+    if output_directory_name == "input":
+        return None
+
+    output_directory_with_name = Path(output_path, output_directory_name).resolve()
+    logging.debug(f"Output filepath: {output_directory_with_name}")
+
+    # Create the output subdirectories:
+    if not output_directory_with_name.exists():
+        output_directory_with_name.mkdir()
+
+    sc2_info_extractor_go_args = (
+        SC2InfoExtractorGoArguments.get_sc2egset_processing_args(
+            processing_input=processing_input_dir,
+            output=output_directory_with_name,
+            perform_chat_anonymization=arguments.perform_chat_anonymization,
+        )
+    )
+
+    return sc2_info_extractor_go_args

From e6118de8d08bfde731fc9e4b3399335a48ec18a7 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 21:55:49 +0100
Subject: [PATCH 68/92] feat(directory_flattener.py): added user_prompt feature

---
 .../directory_flattener.py                    | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index dcf10f9..ea48626 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -11,6 +11,7 @@
 from tqdm import tqdm
 
 from datasetpreparator.settings import LOGGING_FORMAT
+from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
 
 def save_dir_mapping(output_path: Path, dir_mapping: dict) -> None:
@@ -117,7 +118,7 @@ def directory_flatten(
 
 
 def multiple_directory_flattener(
-    input_path: Path, output_path: Path, file_extension: str
+    input_path: Path, output_path: Path, file_extension: str, force: bool
 ) -> Tuple[bool, List[Path]]:
     """
     Provides the main logic for "directory flattening".
@@ -139,6 +140,9 @@ def multiple_directory_flattener(
     file_extension : str
         Specifies extension for which the detected files will be brought \
         up to the top level of the "flattened" directory
+    force : bool
+        Specifies if the user wants to overwrite the output directory without \
+        being prompted.
 
     Returns
     -------
@@ -149,23 +153,22 @@ def multiple_directory_flattener(
 
     # input must be a directory:
     if not input_path.is_dir():
-        logging.error(
-            f"Input path must be a directory! {input_path.resolve().as_posix()}"
-        )
+        logging.error(f"Input path must be a directory! {str(input_path.resolve())}")
         return (False, [Path()])
 
     # Input must exist:
     if not input_path.exists():
-        logging.error(f"Input path must exist! {input_path.resolve().as_posix()}")
+        logging.error(f"Input path must exist! {str(input_path.resolve())}")
         return (False, [Path()])
 
     # Output path must be a directory:
     if not output_path.is_dir():
-        logging.error(
-            f"Output path must be a directory! {output_path.resolve().as_posix()}"
-        )
+        logging.error(f"Output path must be a directory! {str(output_path.resolve())}")
         return (False, [Path()])
 
+    if user_prompt_overwrite_ok(path=output_path, force=force):
+        output_path.mkdir(exist_ok=True)
+
     output_directories = []
     # Iterate over directories:
     for item in os.listdir(input_path):
@@ -182,9 +185,9 @@ def multiple_directory_flattener(
             continue
 
         dir_output_path = Path(output_path, item).resolve()
-        if not dir_output_path.exists():
+        if user_prompt_overwrite_ok(path=dir_output_path, force=force):
             logging.debug(f"Creating directory {str(dir_output_path)}, didn't exist.")
-            dir_output_path.mkdir()
+            dir_output_path.mkdir(exist_ok=True)
 
         dir_structure_mapping = directory_flatten(
             root_directory=maybe_dir,

From 2969e0787ad31a8e2458bd451db3fe40f74400ed Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 21:56:36 +0100
Subject: [PATCH 69/92] refactor(user_prompt.py): added logging

---
 src/datasetpreparator/utils/user_prompt.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/datasetpreparator/utils/user_prompt.py b/src/datasetpreparator/utils/user_prompt.py
index 5c8b76c..cbffc6e 100644
--- a/src/datasetpreparator/utils/user_prompt.py
+++ b/src/datasetpreparator/utils/user_prompt.py
@@ -1,7 +1,8 @@
 from pathlib import Path
+import logging
 
 
-def user_prompt_overwrite_ok(filepath: Path, force: bool) -> bool:
+def user_prompt_overwrite_ok(path: Path, force: bool) -> bool:
     """
     Prompts the user to confirm if they want to potentially overwrite a file or directory if
     it already exists.
@@ -26,11 +27,13 @@ def user_prompt_overwrite_ok(filepath: Path, force: bool) -> bool:
 
     # File or directory does not exist, so it can be created,
     # there is no risk of overwriting anything:
-    if not filepath.exists():
+    if not path.exists():
+        logging.debug(f"Path {str(path.resolve())} does not exist. Safe to create.")
         return True
 
     # Directory is empty, so it can be overwritten without any risk:
-    if filepath.is_dir() and len(list(filepath.iterdir())) == 0:
+    if path.is_dir() and len(list(path.iterdir())) == 0:
+        logging.debug(f"Directory {str(path.resolve())} is empty. Safe to overwrite.")
         return True
 
     return_map = {
@@ -40,13 +43,17 @@ def user_prompt_overwrite_ok(filepath: Path, force: bool) -> bool:
 
     # File or directory exists, so we need to prompt the user to confirm
     # if they want to potentially overwrite something and loose data:
-    print(f"File {filepath} already exists.")
+    print(f"File {path} already exists.")
     user_input = input("Do you want to overwrite it? (y/n): ")
 
     if user_input.lower() in return_map:
+        logging.debug(
+            f"User input: {user_input.lower()}, returning: {return_map[user_input.lower()]}"
+        )
         return return_map[user_input.lower()]
 
+    logging.debug(
+        f"Invalid input provided: {user_input.lower()}, calling the function recursively."
+    )
     print("Invalid input, please type 'y' or 'n'.")
-    return user_prompt_overwrite_ok(filepath, force)
-
-    return False
+    return user_prompt_overwrite_ok(path, force)

From 308b772e88a2025e3d27da6016f52a442a844435 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:01:11 +0100
Subject: [PATCH 70/92] feat(directory_packager.py): added user prompting

---
 .../directory_packager/directory_packager.py      | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/datasetpreparator/directory_packager/directory_packager.py b/src/datasetpreparator/directory_packager/directory_packager.py
index c112f9b..4147eeb 100644
--- a/src/datasetpreparator/directory_packager/directory_packager.py
+++ b/src/datasetpreparator/directory_packager/directory_packager.py
@@ -7,6 +7,7 @@
 import click
 
 from datasetpreparator.settings import LOGGING_FORMAT
+from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
 
 def multiple_dir_packager(input_path: str) -> List[Path]:
@@ -52,11 +53,15 @@ def dir_packager(directory_path: Path) -> Path:
     """
 
     final_archive_path = directory_path.with_suffix(".zip")
-    logging.info(f"Set final archive name to: {final_archive_path.as_posix()}")
-    with ZipFile(final_archive_path.as_posix(), "w") as zip_file:
-        for file in directory_path.iterdir():
-            abs_filepath = os.path.join(directory_path, file)
-            zip_file.write(filename=abs_filepath, arcname=file, compress_type=ZIP_BZIP2)
+
+    if user_prompt_overwrite_ok(final_archive_path):
+        logging.info(f"Set final archive name to: {str(final_archive_path)}")
+        with ZipFile(str(final_archive_path), "w") as zip_file:
+            for file in directory_path.iterdir():
+                abs_filepath = os.path.join(directory_path, file)
+                zip_file.write(
+                    filename=abs_filepath, arcname=file, compress_type=ZIP_BZIP2
+                )
 
     return final_archive_path
 

From e3bf197f860289ec433b5bd303fd786531b4f9ab Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:25:15 +0100
Subject: [PATCH 71/92] refactor: using glob instead of os.walk

---
 .../file_renamer/file_renamer.py              | 85 ++++++++-----------
 1 file changed, 37 insertions(+), 48 deletions(-)

diff --git a/src/datasetpreparator/file_renamer/file_renamer.py b/src/datasetpreparator/file_renamer/file_renamer.py
index cb10725..ed602f8 100644
--- a/src/datasetpreparator/file_renamer/file_renamer.py
+++ b/src/datasetpreparator/file_renamer/file_renamer.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from pathlib import Path
 
 import click
@@ -23,56 +22,46 @@ def file_renamer(input_path: Path) -> None:
     """
 
     if not input_path.exists():
-        raise FileNotFoundError(f"Input path {input_path.as_posix()} does not exist.")
-
-    if not input_path.is_dir():
-        raise NotADirectoryError(
-            f"Input path {input_path.as_posix()} is not a directory."
+        logging.error(
+            f"Input path {str(input_path)} does not exist. No files will be renamed."
         )
+        return
+    if not input_path.is_dir():
+        logging.error(f"Input path {str(input_path)} is not a directory.")
+        return
 
     if not len(list(input_path.iterdir())) > 0:
-        raise ValueError(f"Input path {input_path.as_posix()} is empty.")
-
-    # TODO: This can be done with iterdir:
-    for directory, _, file_list in os.walk(input_path.as_posix()):
-        for file in file_list:
-            # REVIEW: Can this be done better? Match case statement?
-            if file.endswith(".zip"):
-                os.rename(
-                    os.path.join(directory, file),
-                    os.path.join(directory, os.path.basename(directory) + "_data.zip"),
-                )
-            if file.startswith("package_summary"):
-                os.rename(
-                    os.path.join(directory, file),
-                    os.path.join(
-                        directory, os.path.basename(directory) + "_summary.json"
-                    ),
-                )
-            if file.startswith("processed_mapping"):
-                os.rename(
-                    os.path.join(directory, file),
-                    os.path.join(
-                        directory,
-                        os.path.basename(directory) + "_processed_mapping.json",
-                    ),
-                )
-            if file.startswith("processed_failed"):
-                os.rename(
-                    os.path.join(directory, file),
-                    os.path.join(
-                        directory,
-                        os.path.basename(directory) + "_processed_failed.log",
-                    ),
-                )
-            if file.startswith("main_log"):
-                os.rename(
-                    os.path.join(directory, file),
-                    os.path.join(
-                        directory,
-                        os.path.basename(directory) + "_main_log.log",
-                    ),
-                )
+        logging.error(f"Input path {str(input_path)} is empty. No files to rename.")
+        return
+
+    all_files = input_path.glob("**/*")
+    for file in all_files:
+        directory = file.parent
+
+        if file.name.endswith(".zip"):
+            new_name = file.stem + "_data.zip"
+            new_path = directory / new_name
+            file.rename(new_path)
+
+        if file.name.startswith("package_summary"):
+            new_name = file.stem + "_summary.json"
+            new_path = directory / new_name
+            file.rename(new_path)
+
+        if file.name.startswith("processed_mapping"):
+            new_name = file.stem + "_processed_mapping.json"
+            new_path = directory / new_name
+            file.rename(new_path)
+
+        if file.name.startswith("processed_failed"):
+            new_name = file.stem + "_processed_failed.log"
+            new_path = directory / new_name
+            file.rename(new_path)
+
+        if file.name.startswith("main_log"):
+            new_name = file.stem + "_main_log.log"
+            new_path = directory / new_name
+            file.rename(new_path)
 
 
 @click.command(

From cc6a65a53468a2f9d91c2ee66121e66c72ced224 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:27:19 +0100
Subject: [PATCH 72/92] docs: changed CLI description

---
 src/datasetpreparator/file_renamer/file_renamer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/datasetpreparator/file_renamer/file_renamer.py b/src/datasetpreparator/file_renamer/file_renamer.py
index ed602f8..40dc2f8 100644
--- a/src/datasetpreparator/file_renamer/file_renamer.py
+++ b/src/datasetpreparator/file_renamer/file_renamer.py
@@ -65,7 +65,7 @@ def file_renamer(input_path: Path) -> None:
 
 
 @click.command(
-    help="Tool used for renaming auxilliary files (log files) that are produced when creating StarCraft 2 (SC2) datasets with https://github.com/Kaszanas/SC2InfoExtractorGo"
+    help="Tool used for renaming auxilliary files (log files) that are produced when creating StarCraft 2 (SC2) datasets with https://github.com/Kaszanas/SC2InfoExtractorGo. Additionally, this tool renames the .zip files so that they carry the original directory name with an added '_data' suffix."
 )
 @click.option(
     "--input_path",

From 75c744e63a3034fbe3b1eb8907d9b9a5d6725ae3 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:27:56 +0100
Subject: [PATCH 73/92] refactor: renamed force to force_overwrite

---
 src/datasetpreparator/utils/user_prompt.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/datasetpreparator/utils/user_prompt.py b/src/datasetpreparator/utils/user_prompt.py
index cbffc6e..1f8bd21 100644
--- a/src/datasetpreparator/utils/user_prompt.py
+++ b/src/datasetpreparator/utils/user_prompt.py
@@ -2,7 +2,7 @@
 import logging
 
 
-def user_prompt_overwrite_ok(path: Path, force: bool) -> bool:
+def user_prompt_overwrite_ok(path: Path, force_overwrite: bool) -> bool:
     """
     Prompts the user to confirm if they want to potentially overwrite a file or directory if
     it already exists.
@@ -22,7 +22,7 @@ def user_prompt_overwrite_ok(path: Path, force: bool) -> bool:
     """
 
     # User passed the force flag, so the files or directories can be overwritten:
-    if force:
+    if force_overwrite:
         return True
 
     # File or directory does not exist, so it can be created,
@@ -56,4 +56,4 @@ def user_prompt_overwrite_ok(path: Path, force: bool) -> bool:
         f"Invalid input provided: {user_input.lower()}, calling the function recursively."
     )
     print("Invalid input, please type 'y' or 'n'.")
-    return user_prompt_overwrite_ok(path, force)
+    return user_prompt_overwrite_ok(path, force_overwrite)

From 8a40d05f3ef18b5277cce5de9996e5bf80497b6a Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:28:38 +0100
Subject: [PATCH 74/92] feat: added force_overwrite flag to CLI

---
 .../directory_flattener.py                    | 27 ++++++++++++++-----
 .../directory_packager/directory_packager.py  | 11 ++++++--
 .../sc2egset_replaypack_processor.py          |  4 +--
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index ea48626..cad3935 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -1,4 +1,3 @@
-import os
 from pathlib import Path
 from typing import Dict, List, Tuple
 import json
@@ -118,7 +117,7 @@ def directory_flatten(
 
 
 def multiple_directory_flattener(
-    input_path: Path, output_path: Path, file_extension: str, force: bool
+    input_path: Path, output_path: Path, file_extension: str, force_overwrite: bool
 ) -> Tuple[bool, List[Path]]:
     """
     Provides the main logic for "directory flattening".
@@ -166,12 +165,12 @@ def multiple_directory_flattener(
         logging.error(f"Output path must be a directory! {str(output_path.resolve())}")
         return (False, [Path()])
 
-    if user_prompt_overwrite_ok(path=output_path, force=force):
+    if user_prompt_overwrite_ok(path=output_path, force_overwrite=force_overwrite):
         output_path.mkdir(exist_ok=True)
 
     output_directories = []
     # Iterate over directories:
-    for item in os.listdir(input_path):
+    for item in input_path.iterdir():
         maybe_dir = Path(input_path, item).resolve()
         if not maybe_dir.is_dir():
             logging.debug(f"Skipping {str(maybe_dir)}, not a directory.")
@@ -185,7 +184,9 @@ def multiple_directory_flattener(
             continue
 
         dir_output_path = Path(output_path, item).resolve()
-        if user_prompt_overwrite_ok(path=dir_output_path, force=force):
+        if user_prompt_overwrite_ok(
+            path=dir_output_path, force_overwrite=force_overwrite
+        ):
             logging.debug(f"Creating directory {str(dir_output_path)}, didn't exist.")
             dir_output_path.mkdir(exist_ok=True)
 
@@ -239,13 +240,26 @@ def multiple_directory_flattener(
     required=True,
     help="File extension for the files that will be put to the top level directory. Example ('.SC2Replay').",
 )
+@click.option(
+    "--force_overwrite",
+    type=bool,
+    default=False,
+    required=True,
+    help="Flag that specifies if the user wants to overwrite files or directories without being prompted.",
+)
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
     help="Log level. Default is WARN.",
 )
-def main(input_path: Path, output_path: Path, file_extension: str, log: str) -> None:
+def main(
+    input_path: Path,
+    output_path: Path,
+    file_extension: str,
+    log: str,
+    force_overwrite: bool,
+) -> None:
     numeric_level = getattr(logging, log.upper(), None)
     if not isinstance(numeric_level, int):
         raise ValueError(f"Invalid log level: {numeric_level}")
@@ -255,6 +269,7 @@ def main(input_path: Path, output_path: Path, file_extension: str, log: str) ->
         input_path=input_path,
         output_path=output_path,
         file_extension=file_extension,
+        force_overwrite=force_overwrite,
     )
 
 
diff --git a/src/datasetpreparator/directory_packager/directory_packager.py b/src/datasetpreparator/directory_packager/directory_packager.py
index 4147eeb..54362b8 100644
--- a/src/datasetpreparator/directory_packager/directory_packager.py
+++ b/src/datasetpreparator/directory_packager/directory_packager.py
@@ -75,19 +75,26 @@ def dir_packager(directory_path: Path) -> Path:
     required=True,
     help="Input path to the directory containing the dataset that is going to be processed by packaging into .zip archives.",
 )
+@click.option(
+    "--force_overwrite",
+    type=bool,
+    default=False,
+    required=True,
+    help="Flag that specifies if the user wants to overwrite files or directories without being prompted.",
+)
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
     default="WARN",
     help="Log level. Default is WARN.",
 )
-def main(input_path: Path, log: str):
+def main(input_path: Path, log: str, force_overwrite: bool):
     numeric_level = getattr(logging, log.upper(), None)
     if not isinstance(numeric_level, int):
         raise ValueError(f"Invalid log level: {numeric_level}")
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
-    multiple_dir_packager(input_path=input_path)
+    multiple_dir_packager(input_path=input_path, force_overwrite=force_overwrite)
 
 
 if __name__ == "__main__":
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index a321dc8..1662225 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -117,7 +117,7 @@ def main(
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
     # Create output directory if it does not exist:
-    if user_prompt_overwrite_ok(filepath=output_path, force=force_overwrite):
+    if user_prompt_overwrite_ok(path=output_path, force_overwrite=force_overwrite):
         output_path.mkdir(exist_ok=True)
 
     input_path = Path(input_path).resolve()
@@ -147,7 +147,7 @@ def main(
     #
 
     if user_prompt_overwrite_ok(
-        filepath=directory_flattener_output_path, force=force_overwrite
+        path=directory_flattener_output_path, force_overwrite=force_overwrite
     ):
         directory_flattener_output_path.mkdir(exist_ok=True)
 

From aa10695b350c532bdbf3670bf6d08e050ea74eae Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:32:53 +0100
Subject: [PATCH 75/92] feat(json_merger.py): added user prompting, and CLI
 flag

---
 .../json_merger/json_merger.py                | 27 +++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/datasetpreparator/json_merger/json_merger.py b/src/datasetpreparator/json_merger/json_merger.py
index 8822df0..e83c795 100644
--- a/src/datasetpreparator/json_merger/json_merger.py
+++ b/src/datasetpreparator/json_merger/json_merger.py
@@ -6,6 +6,7 @@
 import click
 
 from datasetpreparator.settings import LOGGING_FORMAT
+from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
 
 def merge_files(path_to_json_one: Path, path_to_json_two: Path) -> Dict[str, str]:
@@ -65,7 +66,10 @@ def save_output(output_filepath: Path, output_dict: Dict[str, str]) -> Path:
 
 
 def json_merger(
-    path_to_json_one: Path, path_to_json_two: Path, output_filepath: Path
+    path_to_json_one: Path,
+    path_to_json_two: Path,
+    output_filepath: Path,
+    force_overwrite: bool,
 ) -> Path:
     """
     Merges two JSON files into one.
@@ -85,6 +89,13 @@ def json_merger(
         Returns a path to the saved merged file.
     """
 
+    # Checking early if the output file can be overwritten:
+    # at this stage no merging of JSON files has been done yet.
+    # User won't have to wait for the files to be merged to be prompted.
+    if not user_prompt_overwrite_ok(output_filepath, force_overwrite):
+        logging.error("User did not confirm possible overwrite. Exiting...")
+        return Path("")
+
     output_dict = merge_files(
         path_to_json_one=path_to_json_one, path_to_json_two=path_to_json_two
     )
@@ -117,6 +128,13 @@ def json_merger(
     required=True,
     help="Filepath to which the result JSON file will be saved, note that any existing file of the same name will be overwriten.",
 )
+@click.option(
+    "--force_overwrite",
+    type=bool,
+    default=False,
+    required=True,
+    help="Flag that specifies if the user wants to overwrite files or directories without being prompted.",
+)
 @click.option(
     "--log",
     type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
@@ -124,7 +142,11 @@ def json_merger(
     help="Log level. Default is WARN.",
 )
 def main(
-    path_to_json_one: Path, path_to_json_two: Path, output_filepath: Path, log: str
+    path_to_json_one: Path,
+    path_to_json_two: Path,
+    output_filepath: Path,
+    log: str,
+    force_overwrite: bool,
 ) -> None:
     numeric_level = getattr(logging, log.upper(), None)
     if not isinstance(numeric_level, int):
@@ -135,6 +157,7 @@ def main(
         path_to_json_one=path_to_json_one,
         path_to_json_two=path_to_json_two,
         output_filepath=output_filepath,
+        force_overwrite=force_overwrite,
     )
 
 

From cf28564ab1ab820c198487ab10d0e378563fc607 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:39:31 +0100
Subject: [PATCH 76/92] refactor(processed_mapping_copier.py): using pathlib,
 refactored functionality with iterdir

---
 .../processed_mapping_copier.py               | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
index 42efd0f..55c3482 100644
--- a/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
+++ b/src/datasetpreparator/processed_mapping_copier/processed_mapping_copier.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from pathlib import Path
 import shutil
 
@@ -8,37 +7,39 @@
 from datasetpreparator.settings import LOGGING_FORMAT
 
 
-def processed_mapping_copier(input_path: str, output_path: str) -> None:
+def processed_mapping_copier(input_path: Path, output_path: Path) -> None:
     """
     Exposes logic for copying a specific file from all of the immediate subdirectories
     of the input path to the matching immediate subdirectories in the output path.
 
     Parameters
     ----------
-    input_path : str
+    input_path : Path
         Specifies the input path that contains subdirectories with the \
         desired file to be copied.
-    output_path : str
+    output_path : Path
         Specifies the output path that contains matching subdirectories which \
         will be the destination of the copied file.
     """
 
     # Iterating over the input path to find all of the immediate directories:
-    for item in os.listdir(input_path):
-        maybe_dir = os.path.join(input_path, item)
-        if os.path.isdir(maybe_dir):
+
+    for maybe_dir in input_path.iterdir():
+        if maybe_dir.is_dir():
             # if the output directory does not exist the copying is ommited:
-            dir_output_path = os.path.join(os.path.abspath(output_path), item)
-            if not os.path.exists(dir_output_path):
+            dir_name = maybe_dir.name
+            dir_output_path = (output_path / dir_name).resolve()
+            if not dir_output_path.exists():
                 continue
 
             # The mapping was detected within the input directory
             # So the path is created and the file is copied:
-            if "processed_mapping.json" in os.listdir(maybe_dir):
-                mapping_filepath = os.path.join(maybe_dir, "processed_mapping.json")
-                mapping_out_filepath = os.path.join(
-                    dir_output_path, "processed_mapping.json"
-                )
+            dir_files = maybe_dir.iterdir()
+            if "processed_mapping.json" in list(dir_files):
+                mapping_filepath = (maybe_dir / "processed_mapping.json").resolve()
+                mapping_out_filepath = (
+                    dir_output_path / "processed_mapping.json"
+                ).resolve()
                 shutil.copy(mapping_filepath, mapping_out_filepath)
 
 

From 2efea732f01a2ad4ce7fe647be4ff4d7d0830dee Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 22:57:50 +0100
Subject: [PATCH 77/92] refactor: applied user prompting for every script

---
 .../sc2/sc2_map_downloader/sc2_map_downloader.py      | 11 ++++++++---
 .../sc2reset_replaypack_downloader.py                 |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py b/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
index bf3d9d4..0b966d4 100644
--- a/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
+++ b/src/datasetpreparator/sc2/sc2_map_downloader/sc2_map_downloader.py
@@ -7,6 +7,7 @@
 import requests
 
 from datasetpreparator.settings import LOGGING_FORMAT
+from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
 
 def list_maps_to_download(replay_files: List[Path]) -> Set[Tuple[str, str]]:
@@ -64,14 +65,19 @@ def download_maps(
 
     for map_hash, map_url in hash_set:
         try:
-            response = requests.get(map_url, allow_redirects=True)
             output_filepath = Path(output_path, f"{map_hash}.SC2Map").resolve()
+            if not user_prompt_overwrite_ok(output_filepath):
+                logging.warning(f"Skipping map: hash: {map_hash} url: {map_url}")
+                continue
+            response = requests.get(map_url, allow_redirects=True)
             with output_filepath.open(mode="wb") as output_map_file:
                 output_map_file.write(response.content)
         except:  # noqa: E722
             logging.error(
                 f"Error detected! Cannot process map: hash: {map_hash} url: {map_url}"
             )
+        finally:
+            logging.warning("Exception handled, continuing...")
             continue
 
     return output_path
@@ -91,7 +97,6 @@ def sc2_map_downloader(input_path: Path, output_path: Path) -> Path:
     """
 
     glob_pattern = "**/*.SC2Replay"
-
     replay_files = input_path.glob(glob_pattern)
     maps_to_download = list_maps_to_download(replay_files=replay_files)
 
@@ -146,7 +151,7 @@ def main(input_path: Path, output_path: Path, log: str) -> None:
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
     output_dir = sc2_map_downloader(
-        input_path=input_path, output_path=output_path.resolve()
+        input_path=input_path.resolve(), output_path=output_path.resolve()
     )
 
     logging.info(f"Finished donwloading maps to: {output_dir.as_posix()}")
diff --git a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
index 6db2161..90d3e57 100644
--- a/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
+++ b/src/datasetpreparator/sc2/sc2reset_replaypack_downloader/sc2reset_replaypack_downloader.py
@@ -115,7 +115,7 @@ def sc2reset_replaypack_downloader(
     default="WARN",
     help="Log level. Default is WARN.",
 )
-def main(download_path: Path, unpack_path: Path, n_workers: int, log: str):
+def main(download_path: Path, unpack_path: Path, n_workers: int, log: str) -> None:
     numeric_level = getattr(logging, log.upper(), None)
     if not isinstance(numeric_level, int):
         raise ValueError(f"Invalid log level: {numeric_level}")

From 7b0ae21a361b16f5606c2e99e1d4c1ae6a2ea5fe Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 23:03:52 +0100
Subject: [PATCH 78/92] ci: attempt at fixing GH Actions, new make target name

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dac1008..2d6dd19 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,7 +23,7 @@ jobs:
 
       - name: Build Dev Docker Image
         run: |
-          make docker_build_dev
+          make docker_build_devcontainer
 
       - name: Docker Run pre-commit on all files.
         run: |

From 7b31022599d3713371c43bb90c19214c2fb6167a Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Mon, 6 Jan 2025 23:05:54 +0100
Subject: [PATCH 79/92] ci: fixing next step in CI pipeline, new target name

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2d6dd19..be54a88 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -44,7 +44,7 @@ jobs:
 
       - name: Build Dev Docker Image
         run: |
-          make docker_build_dev PYTHON_VERSION=${{ matrix.python-version }}
+          make docker_build_devcontainer PYTHON_VERSION=${{ matrix.python-version }}
 
       - name: Build Docker Image With Python ${{ matrix.python-version }}
         run: |

From bcb41dbb443229d17f08d968fb18aacf2b9a60a5 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 8 Jan 2025 00:38:58 +0100
Subject: [PATCH 80/92] test: fixing tests with new features, fixing assertions

---
 makefile                                      | 14 ++++++++++-
 .../directory_flattener.py                    |  2 +-
 .../directory_packager/directory_packager.py  | 14 +++++++----
 tests/test_cases/directory_flattener_test.py  | 22 ++++++++++++++++++
 tests/test_cases/directory_packager_test.py   |  4 +++-
 tests/test_cases/json_merger_test.py          |  1 +
 .../sc2egset_replaypack_processor_test.py     |  2 ++
 tests/test_main.py                            |  2 +-
 tests/test_settings.py                        |  5 ++++
 tests/test_utils.py                           | 23 ++++++++-----------
 10 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/makefile b/makefile
index b2657af..f0d006f 100644
--- a/makefile
+++ b/makefile
@@ -14,7 +14,9 @@ COMPOSE_PROJECT_NAME = datasetpreparator
 # Python variables:
 PYTHON_VERSION = 3.11
 
-TEST_COMMAND = "poetry run pytest --durations=100 --ignore-glob='test_*.py' tests --cov=datasetpreparator --cov-report term-missing --cov-report html 2>&1"
+TEST_COMMAND_RAW = poetry run pytest --durations=100 --ignore-glob='test_*.py' tests --cov=datasetpreparator --cov-report term-missing --cov-report html 2>&1
+
+TEST_COMMAND = "$(TEST_COMMAND_RAW)"
 
 TEST_COMMAND_LOG = "poetry run pytest --durations=100 --ignore-glob='test_*.py' tests --cov=datasetpreparator --cov-report term-missing --cov-report html 2>&1 | tee /app/logs/test_output.log"
 
@@ -100,6 +102,16 @@ package_sc2reset_dataset: ## Packages the raw data. Used to prepare SC2ReSet Rep
 		python3 file_packager.py \
 		--input_dir ./processing/directory_flattener/output
 
+###################
+#### LOCAL ########
+###################
+.PHONY: test
+test: ## Runs the tests using the local environment.
+	@echo "Running the tests using the local environment."
+	@echo "Using the test command: $(TEST_COMMAND)"
+	$(TEST_COMMAND_RAW)
+
+
 ###################
 #### DOCKER #######
 ###################
diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index cad3935..b5baf81 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -183,7 +183,7 @@ def multiple_directory_flattener(
             )
             continue
 
-        dir_output_path = Path(output_path, item).resolve()
+        dir_output_path = Path(output_path, item.name).resolve()
         if user_prompt_overwrite_ok(
             path=dir_output_path, force_overwrite=force_overwrite
         ):
diff --git a/src/datasetpreparator/directory_packager/directory_packager.py b/src/datasetpreparator/directory_packager/directory_packager.py
index 54362b8..28cb373 100644
--- a/src/datasetpreparator/directory_packager/directory_packager.py
+++ b/src/datasetpreparator/directory_packager/directory_packager.py
@@ -10,7 +10,7 @@
 from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
 
-def multiple_dir_packager(input_path: str) -> List[Path]:
+def multiple_dir_packager(input_path: str, force_overwrite: bool) -> List[Path]:
     """
     Packages the specified directory into a .zip archive.
 
@@ -18,6 +18,8 @@ def multiple_dir_packager(input_path: str) -> List[Path]:
     ----------
     input_path : str
         Specifies the path which will be turned into a .zip archive.
+    force_overwrite : bool
+        Specifies if the user wants to overwrite files or directories without being prompted
 
     Returns
     -------
@@ -31,12 +33,14 @@ def multiple_dir_packager(input_path: str) -> List[Path]:
         if not directory_path.is_dir():
             continue
 
-        output_archives.append(dir_packager(directory_path=directory_path))
+        output_archives.append(
+            dir_packager(directory_path=directory_path, force_overwrite=force_overwrite)
+        )
 
     return output_archives
 
 
-def dir_packager(directory_path: Path) -> Path:
+def dir_packager(directory_path: Path, force_overwrite: bool) -> Path:
     """
     Archives a single input directory.
     Archive is stored in the same directory as the input.
@@ -45,6 +49,8 @@ def dir_packager(directory_path: Path) -> Path:
     ----------
     directory_path : Path
         Specifies the path to the directory that will be archived.
+    force_overwrite : bool
+        Specifies if the user wants to overwrite files or directories without being prompted
 
     Returns
     -------
@@ -54,7 +60,7 @@ def dir_packager(directory_path: Path) -> Path:
 
     final_archive_path = directory_path.with_suffix(".zip")
 
-    if user_prompt_overwrite_ok(final_archive_path):
+    if user_prompt_overwrite_ok(final_archive_path, force_overwrite=force_overwrite):
         logging.info(f"Set final archive name to: {str(final_archive_path)}")
         with ZipFile(str(final_archive_path), "w") as zip_file:
             for file in directory_path.iterdir():
diff --git a/tests/test_cases/directory_flattener_test.py b/tests/test_cases/directory_flattener_test.py
index 2629bcc..19b77aa 100644
--- a/tests/test_cases/directory_flattener_test.py
+++ b/tests/test_cases/directory_flattener_test.py
@@ -1,3 +1,4 @@
+import logging
 import unittest
 from datasetpreparator.directory_flattener.directory_flattener import (
     multiple_directory_flattener,
@@ -48,11 +49,19 @@ def setUpClass(cls) -> None:
                 extension=".txt",
             )
 
+        # TODO: There could also be empty input directories to test or directorie
+        # with all files that are not of the selected extension.
+        # In that case to get a proper assertion each of the input directories should
+        # be remembered and the number of files in the output should be known
+        # before the assertions are made.
+        # cls.list_of_input_dirs = list(cls.input_path.iterdir())
+
     def test_directory_flattener(self) -> None:
         ok, list_of_output_dirs = multiple_directory_flattener(
             input_path=self.input_path,
             output_path=self.output_path,
             file_extension=self.file_extension,
+            force_overwrite=True,
         )
 
         # Check if the input data was correct for processing:
@@ -60,13 +69,26 @@ def test_directory_flattener(self) -> None:
 
         # Check the number of output directories:
         self.assertEqual(self.n_dirs, len(list_of_output_dirs))
+        logging.info(f"Number of output directories: {len(list_of_output_dirs)}")
+        logging.info(f"Output directories: {list_of_output_dirs}")
 
         for output_dir in list_of_output_dirs:
+            logging.info(f"Checking output directory: {output_dir}")
             # Assert the final directory have the same number of
             # files with the selected extension.
             out_files = list(output_dir.glob(f"*{self.file_extension}"))
+
+            logging.info(
+                f"Number of files with extension {self.file_extension}: {len(out_files)}"
+            )
+            logging.info(f"{self.n_nested_files=}")
+
+            # Check only the number of files with the selected extension:
             self.assertEqual(self.n_nested_files, len(out_files))
 
+            # Check with the processed mapping file created:
+            self.assertEqual(self.n_nested_files + 1, len(list(output_dir.iterdir())))
+
             # Assert the final flattened directory to
             # have one .json file with the mapping
             json_files = list(output_dir.glob("*.json"))
diff --git a/tests/test_cases/directory_packager_test.py b/tests/test_cases/directory_packager_test.py
index 52a470a..ac204c4 100644
--- a/tests/test_cases/directory_packager_test.py
+++ b/tests/test_cases/directory_packager_test.py
@@ -38,7 +38,9 @@ def setUpClass(cls) -> None:
             create_test_text_files(input_path=directory, n_files=cls.n_files)
 
     def test_multiple_dir_packager(self) -> None:
-        archives = multiple_dir_packager(input_path=self.input_path)
+        archives = multiple_dir_packager(
+            input_path=self.input_path, force_overwrite=True
+        )
 
         # Archive should exists:
         self.assertTrue(archives[0].exists())
diff --git a/tests/test_cases/json_merger_test.py b/tests/test_cases/json_merger_test.py
index dc4921b..db48113 100644
--- a/tests/test_cases/json_merger_test.py
+++ b/tests/test_cases/json_merger_test.py
@@ -47,6 +47,7 @@ def test_json_merger(self):
             path_to_json_one=self.path_to_json_one,
             path_to_json_two=self.path_to_json_two,
             output_filepath=self.output_filepath,
+            force_overwrite=True,
         )
 
         # Read merged file:
diff --git a/tests/test_cases/sc2egset_replaypack_processor_test.py b/tests/test_cases/sc2egset_replaypack_processor_test.py
index 40a0dac..d6baa54 100644
--- a/tests/test_cases/sc2egset_replaypack_processor_test.py
+++ b/tests/test_cases/sc2egset_replaypack_processor_test.py
@@ -32,6 +32,7 @@ def setUpClass(cls) -> None:
         # Create and get test input and output directories:
         cls.input_path = create_script_test_input_dir(script_name=cls.SCRIPT_NAME)
         cls.output_path = create_script_test_output_dir(script_name=cls.SCRIPT_NAME)
+        cls.maps_directory = cls.input_path / "maps"
 
         # TODO: Verify that SC2InfoExtractorGo is available in path.
         # If not available download from GitHub release page.
@@ -51,6 +52,7 @@ def test_sc2_replaypack_processor(self):
         arguments = ReplaypackProcessorArguments(
             input_path=self.input_path,
             output_path=self.output_path,
+            maps_directory=self.maps_directory,
             n_processes=1,
         )
 
diff --git a/tests/test_main.py b/tests/test_main.py
index a46355f..c49a680 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -21,7 +21,7 @@ def suite():
     start_dir = os.path.join(get_workspace_dir(), "tests")
     suite = unittest.TestLoader().discover(
         start_dir=start_dir,
-        pattern="sc2reset_replaypack_downloader_test.py",  # *_test.py
+        pattern="directory_flattener_test.py",  # *_test.py
     )
     return suite
 
diff --git a/tests/test_settings.py b/tests/test_settings.py
index eaadbbb..e9af6ce 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -1,3 +1,8 @@
+import sys
+
+
+TEST_WORKSPACE = sys.path[0]
+
 TEST_DIR_NAME = "tests"
 TEST_FILES_NAME = "test_files"
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index cfa0a99..64a7578 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,11 +1,11 @@
-import os
+import json
 import logging
-from pathlib import Path
 import shutil
+from pathlib import Path
 from typing import List
-import json
 
-from tests.test_settings import TEST_DIR_NAME, TEST_FILES_NAME
+
+from tests.test_settings import TEST_DIR_NAME, TEST_FILES_NAME, TEST_WORKSPACE
 
 
 def get_workspace_dir() -> Path:
@@ -19,16 +19,10 @@ def get_workspace_dir() -> Path:
         Returns the path to the workspace.
     """
 
-    logging.info(
-        "Entered get_workspace_dir(), attempting to set \
-        workspace_dir = os.environ.get('TEST_WORKSPACE')"
-    )
+    logging.info("Entered get_workspace_dir(), attempting to set workspace_dir.")
 
-    workspace_dir = Path(os.environ.get("TEST_WORKSPACE")).resolve()
-    logging.info(
-        f"Successfully set workspace_dir = {workspace_dir}, \
-        Attempting to return workspace_dir."
-    )
+    workspace_dir = Path(TEST_WORKSPACE).resolve()
+    logging.info(f"Successfully set workspace_dir = {workspace_dir}")
     return workspace_dir
 
 
@@ -345,11 +339,14 @@ def dir_test_cleanup(
     # Removes entire script test directory and returns as it
     # contains both input and output directories:
     if delete_script_test_dir_bool:
+        logging.info(f"{delete_script_test_dir_bool=}, deleting script test dir.")
         delete_script_test_dir(script_name=script_name)
         return
 
     if delete_script_test_input_bool:
+        logging.info(f"{delete_script_test_input_bool=}, deleting script test input.")
         delete_script_test_input(script_name=script_name)
 
     if delete_script_test_output_bool:
+        logging.info(f"{delete_script_test_output_bool=}, deleting script test output.")
         delete_script_test_output(script_name=script_name)

From e1a1a005edb66acc41785fe78f8340a35816b6f2 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 8 Jan 2025 19:10:36 +0100
Subject: [PATCH 81/92] feat: drafted full SC2ReSet/SC2EGSet pipeline

---
 .../sc2egset_replaypack_processor.py          | 223 ++++++++++--------
 .../utils/file_copier.py                      |  33 +++
 .../utils/multiprocess.py                     |  30 +++
 .../utils/replaypack_processor_args.py        |  17 +-
 .../sc2egset_replaypack_processor_test.py     |   1 +
 5 files changed, 205 insertions(+), 99 deletions(-)
 create mode 100644 src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/file_copier.py

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 1662225..89ea24e 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -1,60 +1,133 @@
-from pathlib import Path
 import logging
-import click
-from tqdm import tqdm
+from pathlib import Path
 
-from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.download_maps import (
-    sc2info_extractor_go_map_download,
-)
-from datasetpreparator.settings import LOGGING_FORMAT
-from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
-    ReplaypackProcessorArguments,
-    define_sc2egset_args,
-)
-from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.multiprocess import (
-    multiprocessing_scheduler,
-)
+import click
 
 from datasetpreparator.directory_flattener.directory_flattener import (
     multiple_directory_flattener,
 )
-
 from datasetpreparator.directory_packager.directory_packager import (
     multiple_dir_packager,
 )
-
+from datasetpreparator.file_renamer.file_renamer import file_renamer
 from datasetpreparator.processed_mapping_copier.processed_mapping_copier import (
     processed_mapping_copier,
 )
-
-from datasetpreparator.file_renamer.file_renamer import file_renamer
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.download_maps import (
+    sc2info_extractor_go_map_download,
+)
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.file_copier import (
+    move_files,
+)
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.multiprocess import (
+    sc2egset_replaypack_processor,
+)
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
+    ReplaypackProcessorArguments,
+)
+from datasetpreparator.settings import LOGGING_FORMAT
 from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
 
-def sc2egset_replaypack_processor(
-    arguments: ReplaypackProcessorArguments,
-):
-    """
-    Processes multiple StarCraft II replaypacks
-    by using https://github.com/Kaszanas/SC2InfoExtractorGo
+def prepare_sc2reset(
+    output_path: Path,
+    replaypacks_input_path: Path,
+    n_processes: int,
+    force_overwrite: bool,
+    maps_output_path: Path,
+    directory_flattener_output_path: Path,
+) -> None:
+    # Directory flattener:
+
+    if user_prompt_overwrite_ok(
+        path=directory_flattener_output_path, force_overwrite=force_overwrite
+    ):
+        directory_flattener_output_path.mkdir(exist_ok=True)
+
+    # TODO: Check if the output directory is not empty, if it is you can proceed
+    # if the directory is not empty issue a warning with confirmation prompt.
 
-    Parameters
-    ----------
-    arguments : ReplaypackProcessorArguments
-        Specifies the arguments as per the ReplaypackProcessorArguments class fields.
-    """
+    # if not empty and not force:
+    #   prompt user to confirm overwrite
 
-    multiprocessing_list = []
-    for maybe_dir in tqdm(list(arguments.input_path.iterdir())):
-        sc2_info_extractor_go_args = define_sc2egset_args(
-            arguments=arguments,
-            maybe_dir=maybe_dir,
-        )
-        if sc2_info_extractor_go_args is not None:
-            multiprocessing_list.append(sc2_info_extractor_go_args)
+    logging.info("Flattening directories...")
+    multiple_directory_flattener(
+        input_path=replaypacks_input_path,
+        output_path=directory_flattener_output_path,
+        file_extension=".SC2Replay",
+    )
 
-    # Run processing with multiple SC2InfoExtractorGo instances:
-    multiprocessing_scheduler(multiprocessing_list, int(arguments.n_processes))
+    # Separate arguments for map downloading are required because the maps directory should be placed
+    # ready for the SC2ReSet to be zipped and moved to the output directory:
+    map_downloader_args = ReplaypackProcessorArguments(
+        input_path=replaypacks_input_path,
+        output_path=directory_flattener_output_path,
+        n_processes=n_processes,
+        maps_directory=maps_output_path,
+    )
+
+    # NOTE: Chinese maps need to be pre-seeded so that they can be
+    # hosted later on. They are also needed for the SC2EGSet to reproduce the results.
+    # Download all maps for multiprocess, map files are used as a source of truth for
+    # SC2InfoExtractorGo downloading mechanism:
+    logging.info("Downloading all maps using SC2InfoExtractorGo...")
+    sc2info_extractor_go_map_download(arguments=map_downloader_args)
+
+    # Package SC2ReSet and the downloaded maps, move to the output directory:
+    logging.info("Packaging SC2ReSet and the downloaded maps...")
+    multiple_dir_packager(input_path=directory_flattener_output_path)
+    # TODO: SC2ReSet should be ready, move it to the final output directory.
+
+    sc2reset_output_path = Path(output_path, "SC2ReSet").resolve()
+
+    move_files(
+        input_path=directory_flattener_output_path,
+        output_path=sc2reset_output_path,
+        force_overwrite=force_overwrite,
+    )
+
+
+def prepare_sc2egset(
+    replaypacks_input_path: Path,
+    output_path: Path,
+    n_processes: int,
+    maps_output_path: Path,
+    directory_flattener_output_path: Path,
+    force_overwrite: bool,
+) -> None:
+    # SC2EGSet Processor:
+    sc2egset_processor_args = ReplaypackProcessorArguments(
+        input_path=replaypacks_input_path,
+        output_path=output_path,
+        n_processes=n_processes,
+        maps_directory=maps_output_path,
+    )
+
+    # Process SC2EGSet, this will use the same map directory as the previous step:
+    logging.info("Processing SC2EGSet using SC2InfoExtractorGo...")
+    sc2egset_replaypack_processor(arguments=sc2egset_processor_args)
+
+    # Processed Mapping Copier:
+    logging.info("Copying processed_mapping.json files...")
+    processed_mapping_copier(
+        input_path=directory_flattener_output_path, output_path=output_path
+    )
+
+    # File Renamer:
+    logging.info("Renaming auxilliary (log) files...")
+    file_renamer(input_path=output_path)
+
+    logging.info("Packaging SC2EGSet...")
+    multiple_dir_packager(input_path=output_path, force_overwrite=force_overwrite)
+
+    # SC2EGSet should be ready, move it to the final output directory:
+    sc2egset_output_path = Path(output_path, "SC2EGSet").resolve()
+
+    move_files(
+        input_path=output_path,
+        output_path=sc2egset_output_path,
+        force_overwrite=force_overwrite,
+    )
 
 
 @click.command(
@@ -120,70 +193,32 @@ def main(
     if user_prompt_overwrite_ok(path=output_path, force_overwrite=force_overwrite):
         output_path.mkdir(exist_ok=True)
 
-    input_path = Path(input_path).resolve()
+    # This input will be flattened:
+    replaypacks_input_path = Path(input_path).resolve()
     output_path = Path(output_path).resolve()
 
     maps_output_path = Path(output_path, "maps").resolve()
-
-    arguments = ReplaypackProcessorArguments(
-        input_path=input_path,
-        output_path=output_path,
-        n_processes=n_processes,
-        maps_directory=maps_output_path,
-    )
-
-    # TODO: Recreate the entire pipeline for SC2ReSet and SC2EGSet:
-    # REVIEW: Note that the Chinese maps need to be pre-seeded so that they can be
-    # hosted later on.
-
-    # Directory flattener:
     directory_flattener_output_path = Path(
         output_path, "directory_flattener_output"
     ).resolve()
-    # REVIEW: Should it be ok if the directory exists?
-    # it may contain some files that should not be overwritten:
-    # --force as a flag in CLI
-    # input command waiting for user input to confirm potential overwrite:
-    #
 
-    if user_prompt_overwrite_ok(
-        path=directory_flattener_output_path, force_overwrite=force_overwrite
-    ):
-        directory_flattener_output_path.mkdir(exist_ok=True)
-
-    # TODO: Check if the output directory is not empty, if it is you can proceed
-    # if the directory is not empty issue a warning with confirmation prompt.
-
-    # if not empty and not force:
-    #   prompt user to confirm overwrite
-
-    logging.info("Flattening directories...")
-    multiple_directory_flattener(
-        input_path=input_path,
-        output_path=directory_flattener_output_path,
-        file_extension=".SC2Replay",
+    # TODO: Recreate the entire pipeline for SC2ReSet and SC2EGSet:
+    prepare_sc2reset(
+        output_path=output_path,
+        replaypacks_input_path=replaypacks_input_path,
+        n_processes=n_processes,
+        force_overwrite=force_overwrite,
+        maps_output_path=maps_output_path,
+        directory_flattener_output_path=directory_flattener_output_path,
     )
 
-    # Download all maps for multiprocess, map files are used as a source of truth for
-    # SC2InfoExtractorGo downloading mechanism:
-    logging.info("Downloading all maps using SC2InfoExtractorGo...")
-    sc2info_extractor_go_map_download(arguments=arguments)
-
-    # Package SC2ReSet and the downloaded maps, move to the output directory:
-    logging.info("Packaging SC2ReSet and the downloaded maps...")
-    multiple_dir_packager(input_path="")
-
-    # Process SC2EGSet, this will use the same map directory as the previous step:
-    logging.info("Processing SC2EGSet using SC2InfoExtractorGo...")
-    sc2egset_replaypack_processor(arguments=arguments)
-
-    # Processed Mapping Copier:
-    logging.info("Copying processed_mapping.json files...")
-    processed_mapping_copier(input_path="", output_path="")
-
-    # File Renamer:
-    logging.info("Renaming auxilliary (log) files...")
-    file_renamer(input_path="")
+    prepare_sc2egset(
+        replaypacks_input_path=replaypacks_input_path,
+        output_path=output_path,
+        n_processes=n_processes,
+        maps_output_path=maps_output_path,
+        directory_flattener_output_path=directory_flattener_output_path,
+    )
 
 
 if __name__ == "__main__":
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/file_copier.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/file_copier.py
new file mode 100644
index 0000000..f9e8c9e
--- /dev/null
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/file_copier.py
@@ -0,0 +1,33 @@
+import logging
+from pathlib import Path
+
+from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
+from tqdm import tqdm
+import shutil
+
+
+def move_files(
+    input_path: Path,
+    output_path: Path,
+    force_overwrite: bool,
+    extension: str = ".zip",
+) -> None:
+    # Make sure that the output directory exists, and potentially overwrite
+    # its contents if the user agrees:
+    if user_prompt_overwrite_ok(path=output_path, force_overwrite=force_overwrite):
+        output_path.mkdir(exist_ok=True)
+
+    logging.info(
+        f"Searching for files with extension {extension} in {str(input_path)}..."
+    )
+
+    files = list(input_path.glob(f"*{extension}"))
+
+    logging.info(f"Copying {len(files)} files to {str(output_path)}...")
+
+    for file in tqdm(
+        files,
+        desc="Copying files",
+        unit="file",
+    ):
+        shutil.move(file, output_path / file.name)
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
index de58ef3..207ed4e 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
@@ -6,8 +6,12 @@
 
 from typing import List
 
+from tqdm import tqdm
+
 from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
+    ReplaypackProcessorArguments,
     SC2InfoExtractorGoArguments,
+    define_sc2egset_args,
 )
 
 from datasetpreparator.settings import PATH_TO_SC2INFOEXTRACTORGO
@@ -85,6 +89,32 @@ def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
     )
 
 
+def sc2egset_replaypack_processor(
+    arguments: ReplaypackProcessorArguments,
+):
+    """
+    Processes multiple StarCraft II replaypacks
+    by using https://github.com/Kaszanas/SC2InfoExtractorGo
+
+    Parameters
+    ----------
+    arguments : ReplaypackProcessorArguments
+        Specifies the arguments as per the ReplaypackProcessorArguments class fields.
+    """
+
+    multiprocessing_list = []
+    for maybe_dir in tqdm(list(arguments.input_path.iterdir())):
+        sc2_info_extractor_go_args = define_sc2egset_args(
+            arguments=arguments,
+            maybe_dir=maybe_dir,
+        )
+        if sc2_info_extractor_go_args is not None:
+            multiprocessing_list.append(sc2_info_extractor_go_args)
+
+    # Run processing with multiple SC2InfoExtractorGo instances:
+    multiprocessing_scheduler(multiprocessing_list, int(arguments.n_processes))
+
+
 def pre_process_download_maps(arguments: SC2InfoExtractorGoArguments) -> None:
     """
     Acts as a pre-process step, executes SC2InfoExtractorGo with the
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
index e3462fd..4ea6504 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/replaypack_processor_args.py
@@ -2,6 +2,8 @@
 from pathlib import Path
 import os
 
+from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
+
 
 class SC2InfoExtractorGoArguments:
     """
@@ -176,6 +178,7 @@ def __init__(
 def define_sc2egset_args(
     arguments: ReplaypackProcessorArguments,
     maybe_dir: Path,
+    force_overwrite: bool,
 ) -> ReplaypackProcessorArguments | None:
     """
     Creates final ReplaypackProcessorArguments for SC2InfoExtractorGo. These arguments
@@ -187,7 +190,9 @@ def define_sc2egset_args(
     arguments : ReplaypackProcessorArguments
         Arguments to the command line tool
     maybe_dir : Path
-        _description_
+        Directory that is being processed
+    force_overwrite : bool
+        Specifies if the output directory should forcefully overwriten without asking the user.
 
     Returns
     -------
@@ -206,8 +211,8 @@ def define_sc2egset_args(
 
     logging.debug(f"Output dir: {output_path}")
     # Create the main output directory:
-    if not output_path.exists():
-        output_path.mkdir()
+    if not user_prompt_overwrite_ok(path=output_path, force_overwrite=force_overwrite):
+        output_path.mkdir(exist_ok=True)
 
     # TODO: use pathlib:
     path, output_directory_name = os.path.split(maybe_dir)
@@ -219,8 +224,10 @@ def define_sc2egset_args(
     logging.debug(f"Output filepath: {output_directory_with_name}")
 
     # Create the output subdirectories:
-    if not output_directory_with_name.exists():
-        output_directory_with_name.mkdir()
+    if not user_prompt_overwrite_ok(
+        path=output_directory_with_name, force_overwrite=force_overwrite
+    ):
+        output_directory_with_name.mkdir(exist_ok=True)
 
     sc2_info_extractor_go_args = (
         SC2InfoExtractorGoArguments.get_sc2egset_processing_args(
diff --git a/tests/test_cases/sc2egset_replaypack_processor_test.py b/tests/test_cases/sc2egset_replaypack_processor_test.py
index d6baa54..4739d38 100644
--- a/tests/test_cases/sc2egset_replaypack_processor_test.py
+++ b/tests/test_cases/sc2egset_replaypack_processor_test.py
@@ -36,6 +36,7 @@ def setUpClass(cls) -> None:
 
         # TODO: Verify that SC2InfoExtractorGo is available in path.
         # If not available download from GitHub release page.
+        # It is probably best for this test to be ran from the docker container.
 
         # TODO: Set up a test directory with at least
         # two replaypacks with at least one SC2Replay file within.

From bc9f7cab92a41e857cb543a26b436e5de85767de Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 8 Jan 2025 19:15:17 +0100
Subject: [PATCH 82/92] refactor: added logging statements

---
 .../directory_flattener/directory_flattener.py        |  4 ++--
 .../sc2egset_replaypack_processor.py                  | 11 ++---------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index b5baf81..3c7df54 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -83,7 +83,7 @@ def directory_flatten(
     for file in tqdm(
         list_of_files,
         desc=f"Flattening directory {root_directory.name}",
-        unit="files",
+        unit="file",
     ):
         # Getting the ReplayPack/directory/structure/file.SC2Replay path,
         # this is needed to calculate the hash of the filepath:
@@ -170,7 +170,7 @@ def multiple_directory_flattener(
 
     output_directories = []
     # Iterate over directories:
-    for item in input_path.iterdir():
+    for item in tqdm(input_path.iterdir()):
         maybe_dir = Path(input_path, item).resolve()
         if not maybe_dir.is_dir():
             logging.debug(f"Skipping {str(maybe_dir)}, not a directory.")
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 89ea24e..415592e 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -44,12 +44,6 @@ def prepare_sc2reset(
     ):
         directory_flattener_output_path.mkdir(exist_ok=True)
 
-    # TODO: Check if the output directory is not empty, if it is you can proceed
-    # if the directory is not empty issue a warning with confirmation prompt.
-
-    # if not empty and not force:
-    #   prompt user to confirm overwrite
-
     logging.info("Flattening directories...")
     multiple_directory_flattener(
         input_path=replaypacks_input_path,
@@ -76,10 +70,9 @@ def prepare_sc2reset(
     # Package SC2ReSet and the downloaded maps, move to the output directory:
     logging.info("Packaging SC2ReSet and the downloaded maps...")
     multiple_dir_packager(input_path=directory_flattener_output_path)
-    # TODO: SC2ReSet should be ready, move it to the final output directory.
 
     sc2reset_output_path = Path(output_path, "SC2ReSet").resolve()
-
+    logging.info("Moving SC2ReSet to the output directory...")
     move_files(
         input_path=directory_flattener_output_path,
         output_path=sc2reset_output_path,
@@ -122,7 +115,7 @@ def prepare_sc2egset(
 
     # SC2EGSet should be ready, move it to the final output directory:
     sc2egset_output_path = Path(output_path, "SC2EGSet").resolve()
-
+    logging.info("Moving SC2EGSet to the output directory...")
     move_files(
         input_path=output_path,
         output_path=sc2egset_output_path,

From 0c9288ffefd39ac2fc62f4725dc7b6eb1a71c8df Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 8 Jan 2025 21:16:06 +0100
Subject: [PATCH 83/92] refactor: removed old directory structure from
 processing

---
 processing/directory_flattener/input/.gitkeep |   0
 .../directory_flattener/output/.gitkeep       |   0
 processing/json_merger/.gitkeep               |   0
 processing/json_merger/map_translation.json   | 935 ----------------
 .../json_merger/new_maps_processed.json       | 996 ------------------
 processing/sc2_map_downloader/output/.gitkeep |   0
 .../output/.gitkeep                           |   0
 7 files changed, 1931 deletions(-)
 delete mode 100644 processing/directory_flattener/input/.gitkeep
 delete mode 100644 processing/directory_flattener/output/.gitkeep
 delete mode 100644 processing/json_merger/.gitkeep
 delete mode 100644 processing/json_merger/map_translation.json
 delete mode 100644 processing/json_merger/new_maps_processed.json
 delete mode 100644 processing/sc2_map_downloader/output/.gitkeep
 delete mode 100644 processing/sc2egset_replaypack_processor/output/.gitkeep

diff --git a/processing/directory_flattener/input/.gitkeep b/processing/directory_flattener/input/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/processing/directory_flattener/output/.gitkeep b/processing/directory_flattener/output/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/processing/json_merger/.gitkeep b/processing/json_merger/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/processing/json_merger/map_translation.json b/processing/json_merger/map_translation.json
deleted file mode 100644
index 832fedd..0000000
--- a/processing/json_merger/map_translation.json
+++ /dev/null
@@ -1,935 +0,0 @@
-{
-    "16 Bits LE": "16-Bit LE",
-    "16 bits EC": "16-Bit LE",
-    "16 bits EE": "16-Bit LE",
-    "16 bits EJ": "16-Bit LE",
-    "16 bitów ER": "16-Bit LE",
-    "16 бит РВ": "16-Bit LE",
-    "16-Bit LE": "16-Bit LE",
-    "16位-天梯版": "16-Bit LE",
-    "16位元 - 天梯版": "16-Bit LE",
-    "16비트 - 래더": "16-Bit LE",
-    "2.000 Atmosferas LE": "2000 Atmospheres LE",
-    "2.000 Atmosfere LE": "2000 Atmospheres LE",
-    "2000 Atmospheres LE": "2000 Atmospheres LE",
-    "2000 Atmosphären LE": "2000 Atmospheres LE",
-    "2000 Atmósferas EE": "2000 Atmospheres LE",
-    "2000 atmosfer ER": "2000 Atmospheres LE",
-    "2000 atmósferas EJ": "2000 Atmospheres LE",
-    "2000 атмосфер РВ": "2000 Atmospheres LE",
-    "2000 애트모스피어 - 래더": "2000 Atmospheres LE",
-    "2000大氣壓力 - 天梯版": "2000 Atmospheres LE",
-    "Abiogenese LE": "Abiogenesis LE",
-    "Abiogenesi LE": "Abiogenesis LE",
-    "Abiogenesis LE": "Abiogenesis LE",
-    "Abiogeneza ER": "Abiogenesis LE",
-    "Abiogenèse EC": "Abiogenesis LE",
-    "Abiogénesis EE": "Abiogenesis LE",
-    "Abiogénesis EJ": "Abiogenesis LE",
-    "Abiogênese LE": "Abiogenesis LE",
-    "Abyssal Reef": "Abyssal Reef",
-    "Abyssal Reef LE": "Abyssal Reef LE",
-    "Acchiappasogni LE": "Dreamcatcher LE",
-    "Achados e Perdidos LE": "Lost and Found LE",
-    "Acid Plant LE": "Acid Plant LE",
-    "Acropoli LE": "Acropolis LE",
-    "Acropolis EC": "Acropolis LE",
-    "Acropolis LE": "Acropolis LE",
-    "Acrópole LE": "Acropolis LE",
-    "Acrópolis EE": "Acropolis LE",
-    "Acrópolis EJ": "Acropolis LE",
-    "Agora mauve EC": "Neon Violet Square LE",
-    "Aire du défenseur EC": "Defender's Landing LE",
-    "Akropol ER": "Acropolis LE",
-    "Akropolis LE": "Acropolis LE",
-    "Alto de Lerilak": "Lerilak Crest",
-    "Anbeginn LE": "Year Zero LE",
-    "Anno zero LE": "Year Zero LE",
-    "Année zéro EC": "Year Zero LE",
-    "Ano Zero LE": "Year Zero LE",
-    "Apanhador de Sonhos LE": "Dreamcatcher LE",
-    "Arrecife Abisal EJ": "Abyssal Reef LE",
-    "Arrecife abisal EE": "Abyssal Reef LE",
-    "Ascension to Aiur LE": "Ascension to Aiur LE",
-    "Ascension vers Aïur EC": "Ascension to Aiur LE",
-    "Ascensione ad Aiur LE": "Ascension to Aiur LE",
-    "Ascensión a Aiur EE": "Ascension to Aiur LE",
-    "Ascensión a Aiur EJ": "Ascension to Aiur LE",
-    "Ascensão para Aiur LE": "Ascension to Aiur LE",
-    "Astillero orbital": "Orbital Shipyard",
-    "Atmosphère 2 000 EC": "2000 Atmospheres LE",
-    "Atrapasueños EE": "Dreamcatcher LE",
-    "Atrapasueños EJ": "Dreamcatcher LE",
-    "Atropo LE": "Nightshade LE",
-    "Attrape-rêves EC": "Dreamcatcher LE",
-    "Aura da Morte LE": "Deathaura LE",
-    "Aura de mort EC": "Deathaura LE",
-    "Aura letal EJ": "Deathaura LE",
-    "Aura mortal EE": "Deathaura LE",
-    "Aura mortale LE": "Deathaura LE",
-    "Aura Śmierci ER": "Deathaura LE",
-    "Automa LE": "Automaton LE",
-    "Automaton ER": "Automaton LE",
-    "Automaton LE": "Automaton LE",
-    "Automne céruléen EC": "Cerulean Fall LE",
-    "Autómata EE": "Automaton LE",
-    "Autómata EJ": "Automaton LE",
-    "Autômato LE": "Automaton LE",
-    "Azurfälle LE": "Cerulean Fall LE",
-    "Año Cero EE": "Year Zero LE",
-    "Año cero EJ": "Year Zero LE",
-    "Backwater LE": "Backwater LE",
-    "Baie du Roi EC": "King's Cove LE",
-    "Barreira do Inverno LE": "Winter's Gate LE",
-    "Barriera sommersa LE": "Abyssal Reef LE",
-    "Bataille sur les planches EC": "Battle on the Boardwalk LE",
-    "Batalha na Prancha LE": "Battle on the Boardwalk LE",
-    "Batalla en el paso EJ": "Battle on the Boardwalk LE",
-    "Batalla en la rambla EE": "Battle on the Boardwalk LE",
-    "Battaglia sul lungomare LE": "Battle on the Boardwalk LE",
-    "Battle on the Boardwalk LE": "Battle on the Boardwalk LE",
-    "Baía do Rei LE": "King's Cove LE",
-    "Baño de sangre en la disco EE": "Disco Bloodbath LE",
-    "Beckett Industries ER": "Beckett Industries LE",
-    "Beckett Industries LE": "Beckett Industries LE",
-    "Bel'Shir Vestige LE (Void)": "Bel'Shir Vestige LE (Void)",
-    "Bel'Shir-Ruinen LE (Void)": "Bel'Shir Vestige LE (Void)",
-    "Beladona LE": "Nightshade LE",
-    "Belladona EE": "Nightshade LE",
-    "Belladona EJ": "Nightshade LE",
-    "Belladone EC": "Nightshade LE",
-    "Berlingrad EC": "Berlingrad LE",
-    "Berlingrad EE": "Berlingrad LE",
-    "Berlingrad ER": "Berlingrad LE",
-    "Berlingrad LE": "Berlingrad LE",
-    "Berlingrado EJ": "Berlingrad LE",
-    "Berlingrado LE": "Berlingrad LE",
-    "Bitwa na Promenadzie ER": "Battle on the Boardwalk LE",
-    "Blackburn EC": "Blackburn LE",
-    "Blackburn EE": "Blackburn LE",
-    "Blackburn LE": "Blackburn LE",
-    "Blackpink EC": "Blackpink LE",
-    "Blackpink EE": "Blackpink LE",
-    "Blackpink EJ": "Blackpink LE",
-    "Blackpink ER": "Blackpink LE",
-    "Blackpink LE": "Blackpink LE",
-    "Blackpink РВ": "Blackpink LE",
-    "Blaustich LE": "Blueshift LE",
-    "Blood Boil LE": "Blood Boil LE",
-    "Blueshift EE": "Blueshift LE",
-    "Blueshift LE": "Blueshift LE",
-    "Braci Abbaglianti LE": "Glittering Ashes LE",
-    "Cactus Valley LE (Void)": "Cactus Valley LE (Void)",
-    "Caduta cerulea LE": "Cerulean Fall LE",
-    "Cala del Rey EE": "King's Cove LE",
-    "Cala real EJ": "King's Cove LE",
-    "Campi dell'onore LE": "Honorgrounds LE",
-    "Campo del honor EE": "Honorgrounds LE",
-    "Campos de Honor EJ": "Honorgrounds LE",
-    "Carnage disco EC": "Disco Bloodbath LE",
-    "Catalisador LE": "Catalyst LE",
-    "Catalizador EE": "Catalyst LE",
-    "Catalizador EJ": "Catalyst LE",
-    "Catalizzatore LE": "Catalyst LE",
-    "Catalyseur EC": "Catalyst LE",
-    "Catalyst LE": "Catalyst LE",
-    "Caída cerúlea EE": "Cerulean Fall LE",
-    "Cendres scintillantes EC": "Glittering Ashes LE",
-    "Cenizas brillantes EE": "Glittering Ashes LE",
-    "Cenizas fulgentes EJ": "Glittering Ashes LE",
-    "Central Protocol": "Central Protocol",
-    "Cerulean Fall LE": "Cerulean Fall LE",
-    "Ceruleum ER": "Cerulean Fall LE",
-    "Champ de stase EC": "Stasis LE",
-    "Champ d’honneur EC": "Honorgrounds LE",
-    "Chantier naval orbital": "Orbital Shipyard",
-    "Chemiewerk LE": "Acid Plant LE",
-    "Ciberbosque EE": "Cyber Forest LE",
-    "Ciberbosque EJ": "Cyber Forest LE",
-    "Ciekawskie Umysły ER": "Curious Minds LE",
-    "Cinzas Reluzentes LE": "Glittering Ashes LE",
-    "Ciénaga EJ": "Backwater LE",
-    "Complejo desproporcionado": "Overgrown Facility",
-    "Complesso abbandonato": "Overgrown Facility",
-    "Complexe luxuriant": "Overgrown Facility",
-    "Concord LE": "Concord LE",
-    "Concorde EC": "Concord LE",
-    "Concordia EE": "Concord LE",
-    "Concordia EJ": "Concord LE",
-    "Concordia LE": "Concord LE",
-    "Conexión directa EJ": "Hardwire LE",
-    "Conexão Profunda LE": "Hardwire LE",
-    "Connessione LE": "Hardwire LE",
-    "Connexion EC": "Hardwire LE",
-    "Corriente de lava": "Lava Flow",
-    "Corrosion EC": "Acid Plant LE",
-    "Covo del Re LE": "King's Cove LE",
-    "Cresta di Lerilak": "Lerilak Crest",
-    "Crista de Lerilak": "Lerilak Crest",
-    "Croisement de Kairos EC": "Kairos Junction LE",
-    "Cruce de Kairos EE": "Kairos Junction LE",
-    "Cruce de Kairos EJ": "Kairos Junction LE",
-    "Crucero turbo '84 EJ": "Turbo Cruise '84 LE",
-    "Cruzeiro Turbo '84 LE": "Turbo Cruise '84 LE",
-    "Crête de Lerilak": "Lerilak Crest",
-    "Curiosità LE": "Curious Minds LE",
-    "Curious Minds LE": "Curious Minds LE",
-    "Cyber Forest LE": "Cyber Forest LE",
-    "Cyberlas ER": "Cyber Forest LE",
-    "Cyberwald LE": "Cyber Forest LE",
-    "Czarny Żar ER": "Blackburn LE",
-    "Czyściciele i przemysłowcy ER": "Purity and Industry LE",
-    "Cúspide de Lerilak": "Lerilak Crest",
-    "Darkness Sanctuary LE": "Darkness Sanctuary LE",
-    "Deathaura LE": "Deathaura LE",
-    "Defender's Landing LE": "Defender's Landing LE",
-    "Desembarco del defensor EE": "Defender's Landing LE",
-    "Desembarco del defensor EJ": "Defender's Landing LE",
-    "Desvio para o Azul LE": "Blueshift LE",
-    "Desvio para o Vermelho LE": "Redshift LE",
-    "Disco Bloodbath LE": "Disco Bloodbath LE",
-    "Disco-Massaker LE": "Disco Bloodbath LE",
-    "Discomassacro LE": "Disco Bloodbath LE",
-    "Discordance EC": "New Repugnancy LE",
-    "District de Néokirk EC (Void)": "Newkirk Precinct TE (Void)",
-    "Distrito Newkirk ET (Void)": "Newkirk Precinct TE (Void)",
-    "Distrito de Newkirk TE (Grande Vazio)": "Newkirk Precinct TE (Void)",
-    "Dolina kaktusów ER (Void)": "Cactus Valley LE (Void)",
-    "Domaine des dormeurs EC": "World of Sleepers LE",
-    "Donnervogel LE": "Thunderbird LE",
-    "Dreamcatcher LE": "Dreamcatcher LE",
-    "Duma Altarisa ER": "Pride of Altaris LE",
-    "Dunkles Refugium LE": "Darkness Sanctuary LE",
-    "Dusk Towers": "Dusk Towers",
-    "Dämmertürme": "Dusk Towers",
-    "Dédale EC": "Lost and Found LE",
-    "Eastwatch LE": "Eastwatch LE",
-    "Efemeron LE": "Ephemeron LE",
-    "Efemeryda ER": "Ephemeron LE",
-    "Efeméride LE": "Ephemeron LE",
-    "Efímero EE": "Ephemeron LE",
-    "Efímero EJ": "Ephemeron LE",
-    "Ehrenstätte LE": "Honorgrounds LE",
-    "Eis und Chrom LE": "Ice and Chrome LE",
-    "Empire éternel EC": "Eternal Empire LE",
-    "En périphérie de Nulpar EC": "Backwater LE",
-    "Ephemeron LE": "Ephemeron LE",
-    "Erneuter Aufruhr LE": "New Repugnancy LE",
-    "Esprits curieux EC": "Curious Minds LE",
-    "Estación Próxima EE": "Proxima Station LE",
-    "Estación Próxima EJ": "Proxima Station LE",
-    "Estaleiro Orbital": "Orbital Shipyard",
-    "Estase LE": "Stasis LE",
-    "Estasis EE": "Stasis LE",
-    "Estasis EJ": "Stasis LE",
-    "Estação Proxima LE": "Proxima Station LE",
-    "Eternal Empire LE": "Eternal Empire LE",
-    "Ever Dream LE": "Ever Dream LE",
-    "Ewiger Traum LE": "Ever Dream LE",
-    "Ewiges Imperium LE": "Eternal Empire LE",
-    "Fenda LE": "Fracture LE",
-    "Fierté d’Altaris EC": "Pride of Altaris LE",
-    "Fleuve de lave": "Lava Flow",
-    "Floresta Cibernética LE": "Cyber Forest LE",
-    "Flujo de lava": "Lava Flow",
-    "Flusso di lava": "Lava Flow",
-    "Fluxo de lava": "Lava Flow",
-    "Foresta cibernetica LE": "Cyber Forest LE",
-    "Fort-Levant EC": "Eastwatch LE",
-    "Forte orientale LE": "Eastwatch LE",
-    "Forêt cybernétique EC": "Cyber Forest LE",
-    "Fractura EE": "Fracture LE",
-    "Fractura EJ": "Fracture LE",
-    "Fracture EC": "Fracture LE",
-    "Fracture LE": "Fracture LE",
-    "Fraktur LE": "Fracture LE",
-    "Frattura LE": "Fracture LE",
-    "Fundgrube LE": "Lost and Found LE",
-    "Funkelnde Asche LE": "Glittering Ashes LE",
-    "Gelo e Cromo LE": "Ice and Chrome LE",
-    "Ghiacciocromo LE": "Ice and Chrome LE",
-    "Glace et chrome EC": "Ice and Chrome LE",
-    "Glittering Ashes LE": "Glittering Ashes LE",
-    "Golden Wall LE": "Golden Wall LE",
-    "Goldene Säulen LE": "Pillars of Gold LE",
-    "Goldener Wall LE": "Golden Wall LE",
-    "Grań Lerilaka": "Lerilak Crest",
-    "Guardiaoriente EE": "Eastwatch LE",
-    "Hardwire LE": "Hardwire LE",
-    "Hervidero de sangre EE": "Blood Boil LE",
-    "Hervor de sangre EJ": "Blood Boil LE",
-    "Hielo y cromo EE": "Ice and Chrome LE",
-    "Hielo y cromo EJ": "Ice and Chrome LE",
-    "High Throughput Sequencer": "High Throughput Sequencer",
-    "Honorgrounds LE": "Honorgrounds LE",
-    "Ice and Chrome LE": "Ice and Chrome LE",
-    "Im Lavastrom": "Lava Flow",
-    "Imperio eterno EE": "Eternal Empire LE",
-    "Imperio eterno EJ": "Eternal Empire LE",
-    "Impero eterno LE": "Eternal Empire LE",
-    "Impianto chimico LE": "Acid Plant LE",
-    "Império Eterno LE": "Eternal Empire LE",
-    "Industrias Beckett EE": "Beckett Industries LE",
-    "Industrias Beckett EJ": "Beckett Industries LE",
-    "Industrie Beckett LE": "Beckett Industries LE",
-    "Industries Beckett EC": "Beckett Industries LE",
-    "Instalación frondosa": "Overgrown Facility",
-    "Instalações Abandonadas": "Overgrown Facility",
-    "Jagannatha": "Jagannatha",
-    "Jagannatha EC": "Jagannatha LE",
-    "Jagannatha EE": "Jagannatha LE",
-    "Jagannatha EJ": "Jagannatha LE",
-    "Jagannatha ER": "Jagannatha LE",
-    "Jagannatha LE": "Jagannatha LE",
-    "Jasny Cień ER": "Lightshade LE",
-    "Junção Kairos LE": "Kairos Junction LE",
-    "Kairos Junction LE": "Kairos Junction LE",
-    "Kairoskreuz LE": "Kairos Junction LE",
-    "Kaktustal LE (Void)": "Cactus Valley LE (Void)",
-    "Katalizator ER": "Catalyst LE",
-    "Katalyse LE": "Catalyst LE",
-    "King's Cove LE": "King's Cove LE",
-    "Kochendes Blut LE": "Blood Boil LE",
-    "Konsonans ER": "Concord LE",
-    "Królewska Zatoka ER": "King's Cove LE",
-    "Königsstrand LE": "King's Cove LE",
-    "La odisea EE": "Odyssey LE",
-    "Laberinto EJ": "Lost and Found LE",
-    "Landung des Verteidigers LE": "Defender's Landing LE",
-    "Lava Flow": "Lava Flow",
-    "Lerilak Crest": "Lerilak Crest",
-    "Lerilakgipfel": "Lerilak Crest",
-    "Lightshade LE": "Lightshade LE",
-    "Lost and Found LE": "Lost and Found LE",
-    "Luces y sombras EE": "Lightshade LE",
-    "Luci e ombre LE": "Lightshade LE",
-    "Lueur nocturne EC": "Lightshade LE",
-    "Lód i Chrom ER": "Ice and Chrome LE",
-    "Lądowisko Obrońców ER": "Defender's Landing LE",
-    "Lśniące Popioły ER": "Glittering Ashes LE",
-    "Masacre disco EJ": "Disco Bloodbath LE",
-    "Massacre na Discoteca LE": "Disco Bloodbath LE",
-    "Mentes Curiosas LE": "Curious Minds LE",
-    "Mentes curiosas EE": "Curious Minds LE",
-    "Mentes curiosas EJ": "Curious Minds LE",
-    "Mondo dei sogni LE": "World of Sleepers LE",
-    "Mundo Adormecido LE": "World of Sleepers LE",
-    "Mundo de durmientes EE": "World of Sleepers LE",
-    "Mundo de durmientes EJ": "World of Sleepers LE",
-    "Mur doré EC": "Golden Wall LE",
-    "Muraglia dorata LE": "Golden Wall LE",
-    "Muralha Dourada LE": "Golden Wall LE",
-    "Muralla dorada EJ": "Golden Wall LE",
-    "Muro dorado EE": "Golden Wall LE",
-    "Mécanisation EC": "Automaton LE",
-    "Nachtschatten LE": "Nightshade LE",
-    "Napływ lawy": "Lava Flow",
-    "Neon Violet Square LE": "Neon Violet Square LE",
-    "Neonlichtdistrikt LE": "Neon Violet Square LE",
-    "Neukirk TE (Void)": "Newkirk Precinct TE (Void)",
-    "Neve insanguinata LE": "Blood Boil LE",
-    "New Repugnancy LE": "New Repugnancy LE",
-    "Newkirk Precinct TE (Void)": "Newkirk Precinct TE (Void)",
-    "Nightshade LE": "Nightshade LE",
-    "Nocny Mrok ER": "Nightshade LE",
-    "Nova Repugnância LE": "New Repugnancy LE",
-    "Nowa Repugnacja ER": "New Repugnancy LE",
-    "Nuance LE": "Lightshade LE",
-    "Nuances de rouge EC": "Redshift LE",
-    "Nueva Incongruencia EJ": "New Repugnancy LE",
-    "Nueva Repugnancia EE": "New Repugnancy LE",
-    "Nuova avversione LE": "New Repugnancy LE",
-    "Objetos perdidos EE": "Lost and Found LE",
-    "Odisea EJ": "Odyssey LE",
-    "Odissea LE": "Odyssey LE",
-    "Odisseia LE": "Odyssey LE",
-    "Odyseja ER": "Odyssey LE",
-    "Odyssee LE": "Odyssey LE",
-    "Odyssey LE": "Odyssey LE",
-    "Odyssée EC": "Odyssey LE",
-    "Oiseau de foudre EC": "Thunderbird LE",
-    "Oksydacja ER": "Oxide LE",
-    "Ombre blu LE": "Blueshift LE",
-    "Ombre rosse LE": "Redshift LE",
-    "Orbital Shipyard": "Orbital Shipyard",
-    "Orbitalwerft": "Orbital Shipyard",
-    "Orgoglio di Altaris LE": "Pride of Altaris LE",
-    "Orgulho de Altaris LE": "Pride of Altaris LE",
-    "Orgullo de Altaris EE": "Pride of Altaris LE",
-    "Orgullo de Altaris EJ": "Pride of Altaris LE",
-    "Ossido LE": "Oxide LE",
-    "Ostwacht LE": "Eastwatch LE",
-    "Otoño cerúleo EJ": "Cerulean Fall LE",
-    "Overgrown Facility": "Overgrown Facility",
-    "Oxid LE": "Oxide LE",
-    "Oxide LE": "Oxide LE",
-    "Oxyde EC": "Oxide LE",
-    "Para Site LE": "Para Site LE",
-    "Para Sito EE": "Para Site LE",
-    "Para-cité EC": "Para Site LE",
-    "Para-serres EC": "Para Site LE",
-    "Parasitenlabor LE": "Para Site LE",
-    "Parasitismo LE": "Para Site LE",
-    "Pasożyt ER": "Para Site LE",
-    "Piazza Viola Neon LE": "Neon Violet Square LE",
-    "Pilares de Ouro LE": "Pillars of Gold LE",
-    "Pilares de oro EE": "Pillars of Gold LE",
-    "Pilares de oro EJ": "Pillars of Gold LE",
-    "Pilastri dorati LE": "Pillars of Gold LE",
-    "Piliers d'or EC": "Pillars of Gold LE",
-    "Pillars of Gold LE": "Pillars of Gold LE",
-    "Plac Neonów ER": "Neon Violet Square LE",
-    "Planta ácida EE": "Acid Plant LE",
-    "Planta ácida EJ": "Acid Plant LE",
-    "Platformy prionowe": "Prion Terraces",
-    "Plaza violeta iluminada EJ": "Neon Violet Square LE",
-    "Plaza violeta neón EE": "Neon Violet Square LE",
-    "Poczerwienienie ER": "Redshift LE",
-    "Podwodny Świat ER": "Submarine LE",
-    "Pogranicze ER": "Backwater LE",
-    "Port Aleksander EC": "Port Aleksander LE",
-    "Port Aleksander ER": "Port Aleksander LE",
-    "Port Aleksander LE": "Port Aleksander LE",
-    "Porte dell'inverno LE": "Winter's Gate LE",
-    "Portes de l’hiver EC": "Winter's Gate LE",
-    "Porto Aleksander LE": "Port Aleksander LE",
-    "Posterunek Newkirk ET (Void)": "Newkirk Precinct TE (Void)",
-    "Pouso dos Defensores LE": "Defender's Landing LE",
-    "Praça Neon Violeta LE": "Neon Violet Square LE",
-    "Pride of Altaris LE": "Pride of Altaris LE",
-    "Prion Terraces": "Prion Terraces",
-    "Procolo Central": "Central Protocol",
-    "Programado EE": "Hardwire LE",
-    "Protocole central": "Central Protocol",
-    "Protocollo centrale": "Central Protocol",
-    "Protocolo central": "Central Protocol",
-    "Protokół centralny": "Central Protocol",
-    "Proxima Station LE": "Proxima Station LE",
-    "Proxima-Station LE": "Proxima Station LE",
-    "Prüfung von Aiur LE": "Ascension to Aiur LE",
-    "Ptak Gromu ER": "Thunderbird LE",
-    "Puerta del invierno EE": "Winter's Gate LE",
-    "Puerta del invierno EJ": "Winter's Gate LE",
-    "Puerto Aleksander EE": "Port Aleksander LE",
-    "Puerto Aleksander EJ": "Port Aleksander LE",
-    "Puesto este EJ": "Eastwatch LE",
-    "Pureté et industrie EC": "Purity and Industry LE",
-    "Pureza e Engenho LE": "Purity and Industry LE",
-    "Pureza e industria EE": "Purity and Industry LE",
-    "Pureza e industria EJ": "Purity and Industry LE",
-    "Purezza e industria LE": "Purity and Industry LE",
-    "Purity and Industry LE": "Purity and Industry LE",
-    "Pájaro de trueno EJ": "Thunderbird LE",
-    "Pássaro Trovejante LE": "Thunderbird LE",
-    "Queda Cerúlea LE": "Cerulean Fall LE",
-    "Quemadura negra EJ": "Blackburn LE",
-    "Rafa otchłani ER": "Abyssal Reef LE",
-    "Recife Abissal LE": "Abyssal Reef LE",
-    "Redemoinho LE (Grande Vazio)": "Whirlwind LE (Void)",
-    "Redshift EE": "Redshift LE",
-    "Redshift LE": "Redshift LE",
-    "Reinheit und Industrie LE": "Purity and Industry LE",
-    "Remanso LE": "Backwater LE",
-    "Remolino EJ (Void)": "Whirlwind LE (Void)",
-    "Rhoskallia EC": "Rhoskallian LE",
-    "Rhoskallian EE": "Rhoskallian LE",
-    "Rhoskallian EJ": "Rhoskallian LE",
-    "Rhoskallian LE": "Rhoskallian LE",
-    "Rhoskallianas LE": "Rhoskallian LE",
-    "Rivages bleus EC": "Blueshift LE",
-    "Rojo amanecer EJ": "Redshift LE",
-    "Rok Zero ER": "Year Zero LE",
-    "Romanticide EC": "Romanticide LE",
-    "Romanticide LE": "Romanticide LE",
-    "Romanticidio EE": "Romanticide LE",
-    "Romanticidio EJ": "Romanticide LE",
-    "Romanticidio LE": "Romanticide LE",
-    "Romanticídio LE": "Romanticide LE",
-    "Romantizid LE": "Romanticide LE",
-    "Romantyzm ER": "Romanticide LE",
-    "Rosanegro LE": "Blackpink LE",
-    "Roskalian ER": "Rhoskallian LE",
-    "Rotstich LE": "Redshift LE",
-    "Rovine di Seras": "Ruins of Seras",
-    "Rozdroże Kairosa ER": "Kairos Junction LE",
-    "Ruinas de Seras": "Ruins of Seras",
-    "Ruinen von Seras": "Ruins of Seras",
-    "Ruines de Seras": "Ruins of Seras",
-    "Ruins of Seras": "Ruins of Seras",
-    "Ruiny Serasu": "Ruins of Seras",
-    "Ruiny na Bel'Shir ER (Void)": "Bel'Shir Vestige LE (Void)",
-    "Ruínas de Seras": "Ruins of Seras",
-    "Rzeźnia Disco ER": "Disco Bloodbath LE",
-    "Récif abyssal EC": "Abyssal Reef LE",
-    "Rêve éternel EC": "Ever Dream LE",
-    "Sanctuaire des ténèbres EC": "Darkness Sanctuary LE",
-    "Sang chaud EC": "Blood Boil LE",
-    "Sangue Fervente LE": "Blood Boil LE",
-    "Sanktuarium Mroku ER": "Darkness Sanctuary LE",
-    "Santuario de la oscuridad EE": "Darkness Sanctuary LE",
-    "Santuario de la oscuridad EJ": "Darkness Sanctuary LE",
-    "Santuario dell'Oscurità LE": "Darkness Sanctuary LE",
-    "Santuário das Trevas LE": "Darkness Sanctuary LE",
-    "Scalo dei difensori LE": "Defender's Landing LE",
-    "Scalo orbitale": "Orbital Shipyard",
-    "Schattenlicht LE": "Lightshade LE",
-    "Schlacht am Pier LE": "Battle on the Boardwalk LE",
-    "Schwarzbrand LE": "Blackburn LE",
-    "Secuenciador EE": "Sequencer LE",
-    "Secuenciador EJ": "Sequencer LE",
-    "Sekwencer ER": "Sequencer LE",
-    "Sequencer LE": "Sequencer LE",
-    "Sequenciador LE": "Sequencer LE",
-    "Sequenz LE": "Sequencer LE",
-    "Sequenza LE": "Sequencer LE",
-    "Simulacre EC": "Simulacrum LE",
-    "Simulacro EE": "Simulacrum LE",
-    "Simulacro EJ": "Simulacrum LE",
-    "Simulacro LE": "Simulacrum LE",
-    "Simulacrum LE": "Simulacrum LE",
-    "Sitio de investigación EJ": "Para Site LE",
-    "Sito biotecnologico LE": "Para Site LE",
-    "Sogno eterno LE": "Ever Dream LE",
-    "Sombraluz EJ": "Lightshade LE",
-    "Sonho Sempiterno LE": "Ever Dream LE",
-    "Sottomarina LE": "Submarine LE",
-    "Sous-marin EC": "Submarine LE",
-    "Stacja Proxima ER": "Proxima Station LE",
-    "Stasi LE": "Stasis LE",
-    "Stasis LE": "Stasis LE",
-    "Station Proxima EC": "Proxima Station LE",
-    "Stazione Kairos LE": "Kairos Junction LE",
-    "Stazione Proxima LE": "Proxima Station LE",
-    "Stałe Połączenie ER": "Hardwire LE",
-    "Stocznia orbitalna": "Orbital Shipyard",
-    "Stolz von Altaris LE": "Pride of Altaris LE",
-    "Strade perdute LE": "Lost and Found LE",
-    "Submarine LE": "Submarine LE",
-    "Submarino EE": "Submarine LE",
-    "Submarino EJ": "Submarine LE",
-    "Submarino LE": "Submarine LE",
-    "Sueño eterno EE": "Ever Dream LE",
-    "Sueño eterno EJ": "Ever Dream LE",
-    "Super turbo 84 LE": "Turbo Cruise '84 LE",
-    "Symulakrum ER": "Simulacrum LE",
-    "Szczelina ER": "Fracture LE",
-    "Séquenceur EC": "Sequencer LE",
-    "Terraplenes de priones": "Prion Terraces",
-    "Terras da Honra LE": "Honorgrounds LE",
-    "Terrassen von Prion": "Prion Terraces",
-    "Terrasses de Prion": "Prion Terraces",
-    "Terrazas de Prion": "Prion Terraces",
-    "Terrazze Prion": "Prion Terraces",
-    "Terraços Prion": "Prion Terraces",
-    "Territorio di Newkirk TE (Void)": "Newkirk Precinct TE (Void)",
-    "Thunderbird EE": "Thunderbird LE",
-    "Thunderbird LE": "Thunderbird LE",
-    "Tiefseeriff LE": "Abyssal Reef LE",
-    "Tierra azul EJ": "Blueshift LE",
-    "Todesaura LE": "Deathaura LE",
-    "Torbellino EE (Void)": "Whirlwind LE (Void)",
-    "Torres Crepusculares": "Dusk Towers",
-    "Torres crepusculares": "Dusk Towers",
-    "Torres del ocaso": "Dusk Towers",
-    "Torri del crepuscolo": "Dusk Towers",
-    "Tourbillon EC (Void)": "Whirlwind LE (Void)",
-    "Tours du crépuscule": "Dusk Towers",
-    "Traumfänger LE": "Dreamcatcher LE",
-    "Triton EC": "Triton LE",
-    "Triton LE": "Triton LE",
-    "Tritone LE": "Triton LE",
-    "Tritão LE": "Triton LE",
-    "Tritón EE": "Triton LE",
-    "Tritón EJ": "Triton LE",
-    "Tryton ER": "Triton LE",
-    "Turbo Cruise '84 EE": "Turbo Cruise '84 LE",
-    "Turbo Cruise '84 LE": "Turbo Cruise '84 LE",
-    "Turbodrome ’84 EC": "Turbo Cruise '84 LE",
-    "Turbojazda 84 ER": "Turbo Cruise '84 LE",
-    "U-Boot LE": "Submarine LE",
-    "Uccello del tuono LE": "Thunderbird LE",
-    "Ulrena": "Ulrena",
-    "Usina Ácida LE": "Acid Plant LE",
-    "Ustione LE": "Blackburn LE",
-    "Vado EE": "Backwater LE",
-    "Vale dos Cactos ER (Void)": "Cactus Valley LE (Void)",
-    "Valle de cactus EE (Void)": "Cactus Valley LE (Void)",
-    "Valle de los cactus EJ (Void)": "Cactus Valley LE (Void)",
-    "Valle dei cactus LE (Void)": "Cactus Valley LE (Void)",
-    "Vallée des cactus EC (Void)": "Cactus Valley LE (Void)",
-    "Verdrahtet LE": "Hardwire LE",
-    "Vestiges de Bel’Shir EC (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vestigia di Bel'Shir LE (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vestigio de Bel'Shir EJ (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vestigio de Bel’Shir EE (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vestígio Bel'Shir LE (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vigília Leste LE": "Eastwatch LE",
-    "Vortice LE (Void)": "Whirlwind LE (Void)",
-    "Welt der Schläfer LE": "World of Sleepers LE",
-    "Whirlwind LE (Void)": "Whirlwind LE (Void)",
-    "Wieczne Imperium ER": "Eternal Empire LE",
-    "Wieczny Sen ER": "Ever Dream LE",
-    "Wielki Błękit ER": "Blueshift LE",
-    "Wieże Zmierzchu": "Dusk Towers",
-    "Winter's Gate LE": "Winter's Gate LE",
-    "Wintertor LE": "Winter's Gate LE",
-    "Wir ER (Void)": "Whirlwind LE (Void)",
-    "Wirbelwind LE (Void)": "Whirlwind LE (Void)",
-    "Wissbegier LE": "Curious Minds LE",
-    "World of Sleepers LE": "World of Sleepers LE",
-    "Wschodnia Strażnica ER": "Eastwatch LE",
-    "Wyniesienie na Aiur ER": "Ascension to Aiur LE",
-    "Wytwórnia Kwasu ER": "Acid Plant LE",
-    "Year Zero LE": "Year Zero LE",
-    "Zagubione Drogi ER": "Lost and Found LE",
-    "Zarośnięta placówka": "Overgrown Facility",
-    "Zastój ER": "Stasis LE",
-    "Zen EC": "Zen LE",
-    "Zen EE": "Zen LE",
-    "Zen EJ": "Zen LE",
-    "Zen ER": "Zen LE",
-    "Zen LE": "Zen LE",
-    "Zentralprotokoll": "Central Protocol",
-    "Zimna krew ER": "Blood Boil LE",
-    "Zimowe Wrota ER": "Winter's Gate LE",
-    "Złote Filary ER": "Pillars of Gold LE",
-    "Złoty Mur ER": "Golden Wall LE",
-    "[ESL] Berlingrad": "[ESL] Berlingrad",
-    "[ESL] Blackburn": "[ESL] Blackburn",
-    "[ESL] Curious Minds": "[ESL] Curious Minds",
-    "[ESL] Glittering Ashes": "[ESL] Glittering Ashes",
-    "[ESL] Hardwire": "[ESL] Hardwire",
-    "[ESL] Pride of Altaris": "[ESL] Pride of Altaris",
-    "[TLMC14] Blackburn": "[TLMC14] Blackburn",
-    "[TLMC15] Berlingrad": "[TLMC15] Berlingrad",
-    "[TLMC15] Curious Minds": "[TLMC15] Curious Minds",
-    "[TLMC15] Glittering Ashes": "[TLMC15] Glittering Ashes",
-    "[TLMC15] Hardwire": "[TLMC15] Hardwire",
-    "[TLMC15] Pride of Altaris": "[TLMC15] Pride of Altaris",
-    "Área de Newkirk ET (Void)": "Newkirk Precinct TE (Void)",
-    "Éphémèrion EC": "Ephemeron LE",
-    "Óxido EE": "Oxide LE",
-    "Óxido EJ": "Oxide LE",
-    "Óxido LE": "Oxide LE",
-    "Überwucherte Anlage": "Overgrown Facility",
-    "Łapacz Snów ER": "Dreamcatcher LE",
-    "Świat Śniących ER": "World of Sleepers LE",
-    "Święta ziemia ER": "Honorgrounds LE",
-    "Абиогенез РВ": "Abiogenesis LE",
-    "Автоматон РВ": "Automaton LE",
-    "Акрополь РВ": "Acropolis LE",
-    "Арена чести РВ": "Honorgrounds LE",
-    "Аура смерти РВ": "Deathaura LE",
-    "Беккетт Индастриз РВ": "Beckett Industries LE",
-    "Берлинград РВ": "Berlingrad LE",
-    "Битва на променаде РВ": "Battle on the Boardwalk LE",
-    "Блестящий пепел РВ": "Glittering Ashes LE",
-    "Блэкберн РВ": "Blackburn LE",
-    "Буревестник РВ": "Thunderbird LE",
-    "Бюро находок РВ": "Lost and Found LE",
-    "Вечная империя РВ": "Eternal Empire LE",
-    "Вихрь РВ (Void)": "Whirlwind LE (Void)",
-    "Восточный дозор РВ": "Eastwatch LE",
-    "Высадка защитников РВ": "Defender's Landing LE",
-    "Глубоководный риф РВ": "Abyssal Reef LE",
-    "Год нулевой РВ": "Year Zero LE",
-    "Гордость Алтариса РВ": "Pride of Altaris LE",
-    "Гребень Лерилак": "Lerilak Crest",
-    "Дзен РВ": "Zen LE",
-    "Долина кактусов РВ (Void)": "Cactus Valley LE (Void)",
-    "Заросший комплекс": "Overgrown Facility",
-    "Затон РВ": "Backwater LE",
-    "Зимние врата РВ": "Winter's Gate LE",
-    "Золотая стена РВ": "Golden Wall LE",
-    "Золотые столпы РВ": "Pillars of Gold LE",
-    "Зона высадки РВ": "Para Site LE",
-    "Катализатор РВ": "Catalyst LE",
-    "Киберлес РВ": "Cyber Forest LE",
-    "Кипящая кровь РВ": "Blood Boil LE",
-    "Кислотный завод РВ": "Acid Plant LE",
-    "Королевская бухта РВ": "King's Cove LE",
-    "Красное смещение РВ": "Redshift LE",
-    "Кровавое диско РВ": "Disco Bloodbath LE",
-    "Лазурные утесы РВ": "Cerulean Fall LE",
-    "Лед и хром РВ": "Ice and Chrome LE",
-    "Ловец снов РВ": "Dreamcatcher LE",
-    "Мир спящих РВ": "World of Sleepers LE",
-    "Неоновая площадь РВ": "Neon Violet Square LE",
-    "Новое противостояние РВ": "New Repugnancy LE",
-    "Ночной сумрак РВ": "Nightshade LE",
-    "Одиссея РВ": "Odyssey LE",
-    "Окраины Ньюкирка РВ (Void)": "Newkirk Precinct TE (Void)",
-    "Оксид РВ": "Oxide LE",
-    "Орбитальная верфь": "Orbital Shipyard",
-    "Подводный мир РВ": "Submarine LE",
-    "Помечтай РВ": "Ever Dream LE",
-    "Порт Александр РВ": "Port Aleksander LE",
-    "Поток лавы": "Lava Flow",
-    "Проводка РВ": "Hardwire LE",
-    "Путь на Айур РВ": "Ascension to Aiur LE",
-    "Пытливые умы РВ": "Curious Minds LE",
-    "Развалины Бел-Шира РВ (Void)": "Bel'Shir Vestige LE (Void)",
-    "Раскол РВ": "Fracture LE",
-    "Романтицид РВ": "Romanticide LE",
-    "Роскаллиан РВ": "Rhoskallian LE",
-    "Руины Сераса": "Ruins of Seras",
-    "Светотень РВ": "Lightshade LE",
-    "Святилище тьмы РВ": "Darkness Sanctuary LE",
-    "Секвенсор РВ": "Sequencer LE",
-    "Симулякр РВ": "Simulacrum LE",
-    "Синее смещение РВ": "Blueshift LE",
-    "Согласие РВ": "Concord LE",
-    "Стазис РВ": "Stasis LE",
-    "Станция Кайрос РВ": "Kairos Junction LE",
-    "Станция Проксима РВ": "Proxima Station LE",
-    "Сумеречные башни": "Dusk Towers",
-    "Террасы Приона": "Prion Terraces",
-    "Тритон РВ": "Triton LE",
-    "Турбокруиз-84 РВ": "Turbo Cruise '84 LE",
-    "Ульрена": "Ulrena",
-    "Центральный протокол": "Central Protocol",
-    "Чистота и производство РВ": "Purity and Industry LE",
-    "Эфемерон РВ": "Ephemeron LE",
-    "Яганната РВ": "Jagannatha LE",
-    "世界主宰-天梯版": "Jagannatha LE",
-    "东部哨塔 - 天梯版": "Eastwatch LE",
-    "中央管制平台": "Central Protocol",
-    "中心协议": "Central Protocol",
-    "乌雷纳": "Ulrena",
-    "亚历山大港 - 天梯版": "Port Aleksander LE",
-    "亞塔利斯之傲 - 天梯版": "Pride of Altaris LE",
-    "亞歷克桑德港 - 天梯版": "Port Aleksander LE",
-    "仙人掌谷地 - 天梯版（虛空之遺）": "Cactus Valley LE (Void)",
-    "伊菲莫隆 - 天梯版": "Ephemeron LE",
-    "伊菲莫隆-天梯版": "Ephemeron LE",
-    "休眠者之境 - 天梯版": "World of Sleepers LE",
-    "保卫者登陆场 - 天梯版": "Defender's Landing LE",
-    "光影交错-天梯版": "Lightshade LE",
-    "光與影 - 天梯版": "Lightshade LE",
-    "冰棱裂境-天梯版": "Fracture LE",
-    "冰雪合金-天梯版": "Ice and Chrome LE",
-    "凯罗斯中转站-天梯版": "Kairos Junction LE",
-    "凱羅斯交會點 - 天梯版": "Kairos Junction LE",
-    "刺激 - 天梯版": "Catalyst LE",
-    "勒瑞萊克山巔": "Lerilak Crest",
-    "動力巡航 '84 - 天梯版": "Turbo Cruise '84 LE",
-    "匹昂台地": "Prion Terraces",
-    "十六宫格 - 天梯版": "Sequencer LE",
-    "協同合作 - 天梯版": "Concord LE",
-    "国王藏宝地-天梯版": "King's Cove LE",
-    "國王海灣 - 天梯版": "King's Cove LE",
-    "地龙岛-天梯版": "Para Site LE",
-    "大气2000-天梯版": "2000 Atmospheres LE",
-    "天然與工業 - 天梯版": "Purity and Industry LE",
-    "天藍降臨 - 天梯版": "Cerulean Fall LE",
-    "失物招領 - 天梯版": "Lost and Found LE",
-    "失而复得-天梯版": "Lost and Found LE",
-    "奥德赛 - 天梯版": "Odyssey LE",
-    "奧德賽 - 天梯版": "Odyssey LE",
-    "好奇心 - 天梯版": "Curious Minds LE",
-    "宿怨战场-天梯版": "New Repugnancy LE",
-    "密林基地": "Overgrown Facility",
-    "寒冬之門 - 天梯版": "Winter's Gate LE",
-    "寒冰與金屬 - 天梯版": "Ice and Chrome LE",
-    "寒血灼地 - 天梯版": "Blood Boil LE",
-    "岩浆浮台": "Lava Flow",
-    "工业与净化-天梯版": "Purity and Industry LE",
-    "布萊克本 - 天梯版": "Blackburn LE",
-    "幽暗之塔": "Dusk Towers",
-    "康科德-天梯版": "Concord LE",
-    "強酸工廠 - 天梯版": "Acid Plant LE",
-    "强酸工厂 - 天梯版": "Acid Plant LE",
-    "截流窪地 - 天梯版": "Backwater LE",
-    "戰意高昂之境 - 天梯版": "Catalyst LE",
-    "拜舍尔遗迹 - 天梯版（虚空）": "Bel'Shir Vestige LE (Void)",
-    "捕夢網 - 天梯版": "Dreamcatcher LE",
-    "擬像之城 - 天梯版": "Simulacrum LE",
-    "新奇之地-天梯版": "Curious Minds LE",
-    "新柯尔克辖区 - 锦标赛版 (虚空)": "Newkirk Precinct TE (Void)",
-    "新生衝突點 - 天梯版": "New Repugnancy LE",
-    "旋風之境 - 天梯版（虛空之遺）": "Whirlwind LE (Void)",
-    "旋风平台 - 天梯版 (虚空)": "Whirlwind LE (Void)",
-    "星軌船廠": "Orbital Shipyard",
-    "普列昂台地": "Prion Terraces",
-    "普罗西玛空间站 - 天梯版": "Proxima Station LE",
-    "普羅希瑪太空站 - 天梯版": "Proxima Station LE",
-    "暮色之塔": "Dusk Towers",
-    "木板小路之戰 - 天梯版": "Battle on the Boardwalk LE",
-    "木路围沙 - 天梯版": "Battle on the Boardwalk LE",
-    "札格納特 - 天梯版": "Jagannatha LE",
-    "机械城 - 天梯版": "Automaton LE",
-    "東方守望 - 天梯版": "Eastwatch LE",
-    "柏林加德-天梯版": "Berlingrad LE",
-    "梦境世界 - 天梯版": "World of Sleepers LE",
-    "梦境世界-天梯版": "World of Sleepers LE",
-    "榮耀戰地 - 天梯版": "Honorgrounds LE",
-    "死亡光环-天梯版": "Deathaura LE",
-    "死亡光環 - 天梯版": "Deathaura LE",
-    "死水之地 - 天梯版": "Backwater LE",
-    "毒茄樹叢 - 天梯版": "Nightshade LE",
-    "氧化物質 - 天梯版": "Oxide LE",
-    "永久連接 - 天梯版": "Hardwire LE",
-    "永恆之夢 - 天梯版": "Ever Dream LE",
-    "永恆帝國 - 天梯版": "Eternal Empire LE",
-    "永恒帝国-天梯版": "Eternal Empire LE",
-    "永恒梦境-天梯版": "Ever Dream LE",
-    "汇龙岛 - 天梯版": "Catalyst LE",
-    "洛斯卡利安 - 天梯版": "Rhoskallian LE",
-    "浴血迪斯科 - 天梯版": "Disco Bloodbath LE",
-    "海神信使 - 天梯版": "Triton LE",
-    "涡轮巡飞城-天梯版": "Turbo Cruise '84 LE",
-    "深海暗礁 - 天梯版": "Abyssal Reef LE",
-    "深海礁岩 - 天梯版": "Abyssal Reef LE",
-    "滨海卫城 - 天梯版": "Acropolis LE",
-    "滨海卫城-天梯版": "Acropolis LE",
-    "潛水艇 - 天梯版": "Submarine LE",
-    "潜水艇-天梯版": "Submarine LE",
-    "烏瑞那": "Ulrena",
-    "熔漿岩地": "Lava Flow",
-    "燦爛灰燼 - 天梯版": "Glittering Ashes LE",
-    "特里同 - 天梯版": "Triton LE",
-    "特里同-天梯版": "Triton LE",
-    "环轨星港": "Orbital Shipyard",
-    "瑟拉斯废墟": "Ruins of Seras",
-    "瑟拉斯遺跡": "Ruins of Seras",
-    "生命之源 - 天梯版": "Abiogenesis LE",
-    "破碎之地 - 天梯版": "Fracture LE",
-    "硬线-天梯版": "Hardwire LE",
-    "禅园-天梯版": "Zen LE",
-    "禪 - 天梯版": "Zen LE",
-    "程序裝置 - 天梯版": "Sequencer LE",
-    "空降地點 - 天梯版": "Para Site LE",
-    "空降守軍 - 天梯版": "Defender's Landing LE",
-    "粉紅黑都": "Blackpink LE",
-    "粉红之都 - 天梯版": "Blackpink LE",
-    "紀年起源 - 天梯版": "Year Zero LE",
-    "紅移 - 天梯版": "Redshift LE",
-    "紅粉不夜城": "Blackpink LE",
-    "紐科克管轄區 - 聯賽版（虛空之遺）": "Newkirk Precinct TE (Void)",
-    "紫夜-天梯版": "Nightshade LE",
-    "紫晶浪漫-天梯版": "Romanticide LE",
-    "綠溢設施": "Overgrown Facility",
-    "红移-天梯版": "Redshift LE",
-    "缀蓝秋原-天梯版": "Cerulean Fall LE",
-    "缘起之地 - 天梯版": "Abiogenesis LE",
-    "罗斯卡联-天梯版": "Rhoskallian LE",
-    "羅曼死 - 天梯版": "Romanticide LE",
-    "自動化 - 天梯版": "Automaton LE",
-    "艾爾飛升 - 天梯版": "Ascension to Aiur LE",
-    "荣耀战场 - 天梯版": "Honorgrounds LE",
-    "荣耀祭坛-天梯版": "Pride of Altaris LE",
-    "莱瑞拉克之冠": "Lerilak Crest",
-    "蓝移-天梯版": "Blueshift LE",
-    "藍移 - 天梯版": "Blueshift LE",
-    "虚拟幻境-天梯版": "Simulacrum LE",
-    "血戰迪斯可 - 天梯版": "Disco Bloodbath LE",
-    "血沸 - 天梯版": "Blood Boil LE",
-    "衛城 - 天梯版": "Acropolis LE",
-    "貝克基特工業區 - 天梯版": "Beckett Industries LE",
-    "貝林格蘭 - 天梯版": "Berlingrad LE",
-    "貝爾石遺跡 - 天梯版（虛空之遺）": "Bel'Shir Vestige LE (Void)",
-    "贝克特工业-天梯版": "Beckett Industries LE",
-    "赛博森林-天梯版": "Cyber Forest LE",
-    "轉瞬即逝 - 天梯版": "Ephemeron LE",
-    "逐梦之地-天梯版": "Dreamcatcher LE",
-    "锈化山巅-天梯版": "Oxide LE",
-    "闪烬平原-天梯版": "Glittering Ashes LE",
-    "零纪元-天梯版": "Year Zero LE",
-    "雷鳥 - 天梯版": "Thunderbird LE",
-    "雷鸟 - 天梯版": "Thunderbird LE",
-    "雷鸟-天梯版": "Thunderbird LE",
-    "電子叢林 - 天梯版": "Cyber Forest LE",
-    "霓虹广场 - 天梯版": "Neon Violet Square LE",
-    "霓虹紫羅蘭廣場 - 天梯版": "Neon Violet Square LE",
-    "静滞区 - 天梯版": "Stasis LE",
-    "靜滯 - 天梯版": "Stasis LE",
-    "飞升艾尔 - 天梯版": "Ascension to Aiur LE",
-    "黃金之柱 - 天梯版": "Pillars of Gold LE",
-    "黃金之牆 - 天梯版": "Golden Wall LE",
-    "黄金之柱-天梯版": "Pillars of Gold LE",
-    "黄金墙-天梯版": "Golden Wall LE",
-    "黑冬隘口 - 天梯版": "Winter's Gate LE",
-    "黑暗聖殿 - 天梯版": "Darkness Sanctuary LE",
-    "黑暗避难所-天梯版": "Darkness Sanctuary LE",
-    "黑色燃烧-天梯版": "Blackburn LE",
-    "골든 월 - 래더": "Golden Wall LE",
-    "궤도 조선소": "Orbital Shipyard",
-    "글리터링 애쉬즈 - 래더": "Glittering Ashes LE",
-    "나이트쉐이드 - 래더": "Nightshade LE",
-    "네온 바이올렛 스퀘어 - 래더": "Neon Violet Square LE",
-    "뉴 리퍼그넌시 - 래더": "New Repugnancy LE",
-    "뉴커크 재개발 지구 - 토너먼트 (공허의 유산)": "Newkirk Precinct TE (Void)",
-    "다크니스 생츄어리 - 래더": "Darkness Sanctuary LE",
-    "데스오라 - 래더": "Deathaura LE",
-    "돌개바람 - 래더 (공허의 유산)": "Whirlwind LE (Void)",
-    "드림캐처 - 래더": "Dreamcatcher LE",
-    "디스코 블러드배스 - 래더": "Disco Bloodbath LE",
-    "디펜더스 랜딩 - 래더": "Defender's Landing LE",
-    "라이트쉐이드 - 래더": "Lightshade LE",
-    "레드쉬프트 - 래더": "Redshift LE",
-    "레릴락 마루": "Lerilak Crest",
-    "로맨티사이드 - 래더": "Romanticide LE",
-    "로스칼리언 - 래더": "Rhoskallian LE",
-    "로스트 앤 파운드 - 래더": "Lost and Found LE",
-    "무성한 시설": "Overgrown Facility",
-    "배틀 온 더 보드워크 - 래더": "Battle on the Boardwalk LE",
-    "백워터 - 래더": "Backwater LE",
-    "베를린그라드 - 래더": "Berlingrad LE",
-    "베케트 인더스트리즈 - 래더": "Beckett Industries LE",
-    "벨시르 잔재 (공허)": "Bel'Shir Vestige LE (Void)",
-    "블랙번 - 래더": "Blackburn LE",
-    "블랙핑크 - 래더": "Blackpink LE",
-    "블러드 보일 - 래더": "Blood Boil LE",
-    "블루시프트 - 래더": "Blueshift LE",
-    "블루시프트 LE": "Blueshift LE",
-    "사이버 포레스트 - 래더": "Cyber Forest LE",
-    "서브머린 - 래더": "Submarine LE",
-    "세라스 폐허": "Ruins of Seras",
-    "세룰리안 폴 - 래더": "Cerulean Fall LE",
-    "세룰리안 폴 LE": "Cerulean Fall LE",
-    "스테이시스 - 래더": "Stasis LE",
-    "시뮬레이크럼 - 래더": "Simulacrum LE",
-    "시퀀스 - LE": "Sequencer LE",
-    "썬더버드 - 래더": "Thunderbird LE",
-    "아너그라운드 - 래더": "Honorgrounds LE",
-    "아이스 앤 크롬 - 래더": "Ice and Chrome LE",
-    "아크로폴리스 - 래더": "Acropolis LE",
-    "애씨드 플랜트 - 래더": "Acid Plant LE",
-    "어비설 리프 - 래더": "Abyssal Reef LE",
-    "어센션 투 아이어 - 래더": "Ascension to Aiur LE",
-    "어스름 탑": "Dusk Towers",
-    "에버 드림 - 래더": "Ever Dream LE",
-    "에이바이오제니시스 - 래더": "Abiogenesis LE",
-    "오딧세이 - 래더": "Odyssey LE",
-    "오토메이턴 - 래더": "Automaton LE",
-    "옥사이드 - 래더": "Oxide LE",
-    "용암 흐름": "Lava Flow",
-    "울레나": "Ulrena",
-    "월드 오브 슬리퍼스 - 래더": "World of Sleepers LE",
-    "윈터스 게이트 - 래더": "Winter's Gate LE",
-    "이스트와치 - 래더": "Eastwatch LE",
-    "이어 제로 - 래더": "Year Zero LE",
-    "이터널 엠파이어 - 래더": "Eternal Empire LE",
-    "이페머론 - 래더": "Ephemeron LE",
-    "자가나타 - 래더": "Jagannatha LE",
-    "젠 - 래더": "Zen LE",
-    "중앙 프로토콜": "Central Protocol",
-    "카이로스 정션 - 래더": "Kairos Junction LE",
-    "카탈리스트 - 래더": "Catalyst LE",
-    "캑터스 밸리 - 래더 (공허)": "Cactus Valley LE (Void)",
-    "콩코드 - 래더": "Concord LE",
-    "큐리어스 마인즈 - 래더": "Curious Minds LE",
-    "킹스 코브 - 래더": "King's Cove LE",
-    "터보 크루즈 '84 - 래더": "Turbo Cruise '84 LE",
-    "트라이튼 - 래더": "Triton LE",
-    "파라 사이트 - 래더": "Para Site LE",
-    "파라 사이트 LE": "Para Site LE",
-    "포트 알렉산더 - 래더": "Port Aleksander LE",
-    "퓨리티 앤 인더스트리 - 래더": "Purity and Industry LE",
-    "프라이드 오브 알타리스 - 래더": "Pride of Altaris LE",
-    "프랙처 LE": "Fracture LE",
-    "프록시마 정거장 - 래더": "Proxima Station LE",
-    "프리온 단구": "Prion Terraces",
-    "필러스 오브 골드 - 래더": "Pillars of Gold LE",
-    "하드와이어 - 래더": "Hardwire LE"
-}
diff --git a/processing/json_merger/new_maps_processed.json b/processing/json_merger/new_maps_processed.json
deleted file mode 100644
index 282ef4c..0000000
--- a/processing/json_merger/new_maps_processed.json
+++ /dev/null
@@ -1,996 +0,0 @@
-{
-    "16 Bits LE": "16-Bit LE",
-    "16 bits EC": "16-Bit LE",
-    "16 bits EE": "16-Bit LE",
-    "16 bits EJ": "16-Bit LE",
-    "16 bitów ER": "16-Bit LE",
-    "16 бит РВ": "16-Bit LE",
-    "16-Bit LE": "16-Bit LE",
-    "16位-天梯版": "16-Bit LE",
-    "16位元 - 天梯版": "16-Bit LE",
-    "16비트 - 래더": "16-Bit LE",
-    "Abiogenese LE": "Abiogenesis LE",
-    "Abiogenesi LE": "Abiogenesis LE",
-    "Abiogenesis LE": "Abiogenesis LE",
-    "Abiogeneza ER": "Abiogenesis LE",
-    "Abiogenèse EC": "Abiogenesis LE",
-    "Abiogénesis EE": "Abiogenesis LE",
-    "Abiogénesis EJ": "Abiogenesis LE",
-    "Abiogênese LE": "Abiogenesis LE",
-    "Abyssal Reef": "Abyssal Reef",
-    "Abyssal Reef LE": "Abyssal Reef LE",
-    "Acchiappasogni LE": "Dreamcatcher LE",
-    "Achados e Perdidos LE": "Lost and Found LE",
-    "Acid Plant LE": "Acid Plant LE",
-    "Acropoli LE": "Acropolis LE",
-    "Acropolis EC": "Acropolis LE",
-    "Acropolis LE": "Acropolis LE",
-    "Acrópole LE": "Acropolis LE",
-    "Acrópolis EE": "Acropolis LE",
-    "Acrópolis EJ": "Acropolis LE",
-    "Agora mauve EC": "Neon Violet Square LE",
-    "Aire du défenseur EC": "Defender's Landing LE",
-    "Akropol ER": "Acropolis LE",
-    "Akropolis LE": "Acropolis LE",
-    "Alto de Lerilak": "Lerilak Crest",
-    "Apanhador de Sonhos LE": "Dreamcatcher LE",
-    "Arrecife Abisal EJ": "Abyssal Reef LE",
-    "Arrecife abisal EE": "Abyssal Reef LE",
-    "Ascension to Aiur LE": "Ascension to Aiur LE",
-    "Ascension vers Aïur EC": "Ascension to Aiur LE",
-    "Ascensione ad Aiur LE": "Ascension to Aiur LE",
-    "Ascensión a Aiur EE": "Ascension to Aiur LE",
-    "Ascensión a Aiur EJ": "Ascension to Aiur LE",
-    "Ascensão para Aiur LE": "Ascension to Aiur LE",
-    "Astillero orbital": "Orbital Shipyard",
-    "Atrapasueños EE": "Dreamcatcher LE",
-    "Atrapasueños EJ": "Dreamcatcher LE",
-    "Attrape-rêves EC": "Dreamcatcher LE",
-    "Automa LE": "Automaton LE",
-    "Automaton ER": "Automaton LE",
-    "Automaton LE": "Automaton LE",
-    "Automne céruléen EC": "Cerulean Fall LE",
-    "Autómata EE": "Automaton LE",
-    "Autómata EJ": "Automaton LE",
-    "Autômato LE": "Automaton LE",
-    "Azurfälle LE": "Cerulean Fall LE",
-    "Backwater LE": "Backwater LE",
-    "Baie du Roi EC": "King's Cove LE",
-    "Barriera sommersa LE": "Abyssal Reef LE",
-    "Bataille sur les planches EC": "Battle on the Boardwalk LE",
-    "Batalha na Prancha LE": "Battle on the Boardwalk LE",
-    "Batalla en el paso EJ": "Battle on the Boardwalk LE",
-    "Batalla en la rambla EE": "Battle on the Boardwalk LE",
-    "Battaglia sul lungomare LE": "Battle on the Boardwalk LE",
-    "Battle on the Boardwalk LE": "Battle on the Boardwalk LE",
-    "Baía do Rei LE": "King's Cove LE",
-    "Bel'Shir Vestige LE (Void)": "Bel'Shir Vestige LE",
-    "Bel'Shir-Ruinen LE (Void)": "Bel'Shir Vestige LE",
-    "Bitwa na Promenadzie ER": "Battle on the Boardwalk LE",
-    "Blackpink EC": "Blackpink LE",
-    "Blackpink EE": "Blackpink LE",
-    "Blackpink EJ": "Blackpink LE",
-    "Blackpink ER": "Blackpink LE",
-    "Blackpink LE": "Blackpink LE",
-    "Blackpink РВ": "Blackpink LE",
-    "Blaustich LE": "Blueshift LE",
-    "Blood Boil LE": "Blood Boil LE",
-    "Blueshift EE": "Blueshift LE",
-    "Blueshift LE": "Blueshift LE",
-    "Cactus Valley LE (Void)": "Cactus Valley LE",
-    "Caduta cerulea LE": "Cerulean Fall LE",
-    "Cala del Rey EE": "King's Cove LE",
-    "Cala real EJ": "King's Cove LE",
-    "Campi dell'onore LE": "Honorgrounds LE",
-    "Campo del honor EE": "Honorgrounds LE",
-    "Campos de Honor EJ": "Honorgrounds LE",
-    "Catalisador LE": "Catalyst LE",
-    "Catalizador EE": "Catalyst LE",
-    "Catalizador EJ": "Catalyst LE",
-    "Catalizzatore LE": "Catalyst LE",
-    "Catalyseur EC": "Catalyst LE",
-    "Catalyst LE": "Catalyst LE",
-    "Caída cerúlea EE": "Cerulean Fall LE",
-    "Central Protocol": "Central Protocol",
-    "Cerulean Fall LE": "Cerulean Fall LE",
-    "Ceruleum ER": "Cerulean Fall LE",
-    "Champ de stase EC": "Stasis LE",
-    "Champ d’honneur EC": "Honorgrounds LE",
-    "Chantier naval orbital": "Orbital Shipyard",
-    "Chemiewerk LE": "Acid Plant LE",
-    "Ciberbosque EE": "Cyber Forest LE",
-    "Ciberbosque EJ": "Cyber Forest LE",
-    "Ciénaga EJ": "Backwater LE",
-    "Complejo desproporcionado": "Overgrown Facility",
-    "Complesso abbandonato": "Overgrown Facility",
-    "Complexe luxuriant": "Overgrown Facility",
-    "Corriente de lava": "Lava Flow",
-    "Corrosion EC": "Acid Plant LE",
-    "Covo del Re LE": "King's Cove LE",
-    "Cresta di Lerilak": "Lerilak Crest",
-    "Crista de Lerilak": "Lerilak Crest",
-    "Croisement de Kairos EC": "Kairos Junction LE",
-    "Cruce de Kairos EE": "Kairos Junction LE",
-    "Cruce de Kairos EJ": "Kairos Junction LE",
-    "Crucero turbo '84 EJ": "Turbo Cruise '84 LE",
-    "Cruzeiro Turbo '84 LE": "Turbo Cruise '84 LE",
-    "Crête de Lerilak": "Lerilak Crest",
-    "Cyber Forest LE": "Cyber Forest LE",
-    "Cyberlas ER": "Cyber Forest LE",
-    "Cyberwald LE": "Cyber Forest LE",
-    "Cúspide de Lerilak": "Lerilak Crest",
-    "Darkness Sanctuary LE": "Darkness Sanctuary LE",
-    "Defender's Landing LE": "Defender's Landing LE",
-    "Desembarco del defensor EE": "Defender's Landing LE",
-    "Desembarco del defensor EJ": "Defender's Landing LE",
-    "Desvio para o Azul LE": "Blueshift LE",
-    "Desvio para o Vermelho LE": "Redshift LE",
-    "Discordance EC": "New Repugnancy LE",
-    "District de Néokirk EC (Void)": "Newkirk Precinct TE (Void)",
-    "Distrito Newkirk ET (Void)": "Newkirk Precinct TE (Void)",
-    "Distrito de Newkirk TE (Grande Vazio)": "Newkirk Precinct TE (Void)",
-    "Dolina kaktusów ER (Void)": "Cactus Valley LE (Void)",
-    "Donnervogel LE": "Thunderbird LE",
-    "Dreamcatcher LE": "Dreamcatcher LE",
-    "Dunkles Refugium LE": "Darkness Sanctuary LE",
-    "Dusk Towers": "Dusk Towers",
-    "Dämmertürme": "Dusk Towers",
-    "Dédale EC": "Lost and Found LE",
-    "Eastwatch LE": "Eastwatch LE",
-    "Ehrenstätte LE": "Honorgrounds LE",
-    "En périphérie de Nulpar EC": "Backwater LE",
-    "Erneuter Aufruhr LE": "New Repugnancy LE",
-    "Estación Próxima EE": "Proxima Station LE",
-    "Estación Próxima EJ": "Proxima Station LE",
-    "Estaleiro Orbital": "Orbital Shipyard",
-    "Estase LE": "Stasis LE",
-    "Estasis EE": "Stasis LE",
-    "Estasis EJ": "Stasis LE",
-    "Estação Proxima LE": "Proxima Station LE",
-    "Fenda LE": "Fracture LE",
-    "Fleuve de lave": "Lava Flow",
-    "Floresta Cibernética LE": "Cyber Forest LE",
-    "Flujo de lava": "Lava Flow",
-    "Flusso di lava": "Lava Flow",
-    "Fluxo de lava": "Lava Flow",
-    "Foresta cibernetica LE": "Cyber Forest LE",
-    "Fort-Levant EC": "Eastwatch LE",
-    "Forte orientale LE": "Eastwatch LE",
-    "Forêt cybernétique EC": "Cyber Forest LE",
-    "Fractura EE": "Fracture LE",
-    "Fractura EJ": "Fracture LE",
-    "Fracture EC": "Fracture LE",
-    "Fracture LE": "Fracture LE",
-    "Fraktur LE": "Fracture LE",
-    "Frattura LE": "Fracture LE",
-    "Fundgrube LE": "Lost and Found LE",
-    "Grań Lerilaka": "Lerilak Crest",
-    "Guardiaoriente EE": "Eastwatch LE",
-    "Hervidero de sangre EE": "Blood Boil LE",
-    "Hervor de sangre EJ": "Blood Boil LE",
-    "Honorgrounds LE": "Honorgrounds LE",
-    "Im Lavastrom": "Lava Flow",
-    "Impianto chimico LE": "Acid Plant LE",
-    "Instalación frondosa": "Overgrown Facility",
-    "Instalações Abandonadas": "Overgrown Facility",
-    "Junção Kairos LE": "Kairos Junction LE",
-    "Kairos Junction LE": "Kairos Junction LE",
-    "Kairoskreuz LE": "Kairos Junction LE",
-    "Kaktustal LE (Void)": "Cactus Valley LE (Void)",
-    "Katalizator ER": "Catalyst LE",
-    "Katalyse LE": "Catalyst LE",
-    "King's Cove LE": "King's Cove LE",
-    "Kochendes Blut LE": "Blood Boil LE",
-    "Królewska Zatoka ER": "King's Cove LE",
-    "Königsstrand LE": "King's Cove LE",
-    "La odisea EE": "Odyssey LE",
-    "Laberinto EJ": "Lost and Found LE",
-    "Landung des Verteidigers LE": "Defender's Landing LE",
-    "Lava Flow": "Lava Flow",
-    "Lerilak Crest": "Lerilak Crest",
-    "Lerilakgipfel": "Lerilak Crest",
-    "Lost and Found LE": "Lost and Found LE",
-    "Lądowisko Obrońców ER": "Defender's Landing LE",
-    "Mécanisation EC": "Automaton LE",
-    "Napływ lawy": "Lava Flow",
-    "Neon Violet Square LE": "Neon Violet Square LE",
-    "Neonlichtdistrikt LE": "Neon Violet Square LE",
-    "Neukirk TE (Void)": "Newkirk Precinct TE (Void)",
-    "Neve insanguinata LE": "Blood Boil LE",
-    "New Repugnancy LE": "New Repugnancy LE",
-    "Newkirk Precinct TE (Void)": "Newkirk Precinct TE (Void)",
-    "Nova Repugnância LE": "New Repugnancy LE",
-    "Nowa Repugnacja ER": "New Repugnancy LE",
-    "Nuances de rouge EC": "Redshift LE",
-    "Nueva Incongruencia EJ": "New Repugnancy LE",
-    "Nueva Repugnancia EE": "New Repugnancy LE",
-    "Nuova avversione LE": "New Repugnancy LE",
-    "Objetos perdidos EE": "Lost and Found LE",
-    "Odisea EJ": "Odyssey LE",
-    "Odissea LE": "Odyssey LE",
-    "Odisseia LE": "Odyssey LE",
-    "Odyseja ER": "Odyssey LE",
-    "Odyssee LE": "Odyssey LE",
-    "Odyssey LE": "Odyssey LE",
-    "Odyssée EC": "Odyssey LE",
-    "Oiseau de foudre EC": "Thunderbird LE",
-    "Ombre blu LE": "Blueshift LE",
-    "Ombre rosse LE": "Redshift LE",
-    "Orbital Shipyard": "Orbital Shipyard",
-    "Orbitalwerft": "Orbital Shipyard",
-    "Ostwacht LE": "Eastwatch LE",
-    "Otoño cerúleo EJ": "Cerulean Fall LE",
-    "Overgrown Facility": "Overgrown Facility",
-    "Para Site LE": "Para Site LE",
-    "Para Sito EE": "Para Site LE",
-    "Para-cité EC": "Para Site LE",
-    "Para-serres EC": "Para Site LE",
-    "Parasitenlabor LE": "Para Site LE",
-    "Parasitismo LE": "Para Site LE",
-    "Pasożyt ER": "Para Site LE",
-    "Piazza Viola Neon LE": "Neon Violet Square LE",
-    "Plac Neonów ER": "Neon Violet Square LE",
-    "Planta ácida EE": "Acid Plant LE",
-    "Planta ácida EJ": "Acid Plant LE",
-    "Platformy prionowe": "Prion Terraces",
-    "Plaza violeta iluminada EJ": "Neon Violet Square LE",
-    "Plaza violeta neón EE": "Neon Violet Square LE",
-    "Poczerwienienie ER": "Redshift LE",
-    "Pogranicze ER": "Backwater LE",
-    "Port Aleksander EC": "Port Aleksander LE",
-    "Port Aleksander ER": "Port Aleksander LE",
-    "Port Aleksander LE": "Port Aleksander LE",
-    "Porto Aleksander LE": "Port Aleksander LE",
-    "Posterunek Newkirk ET (Void)": "Newkirk Precinct TE (Void)",
-    "Pouso dos Defensores LE": "Defender's Landing LE",
-    "Praça Neon Violeta LE": "Neon Violet Square LE",
-    "Prion Terraces": "Prion Terraces",
-    "Procolo Central": "Central Protocol",
-    "Protocole central": "Central Protocol",
-    "Protocollo centrale": "Central Protocol",
-    "Protocolo central": "Central Protocol",
-    "Protokół centralny": "Central Protocol",
-    "Proxima Station LE": "Proxima Station LE",
-    "Proxima-Station LE": "Proxima Station LE",
-    "Prüfung von Aiur LE": "Ascension to Aiur LE",
-    "Ptak Gromu ER": "Thunderbird LE",
-    "Puerto Aleksander EE": "Port Aleksander LE",
-    "Puerto Aleksander EJ": "Port Aleksander LE",
-    "Puesto este EJ": "Eastwatch LE",
-    "Pájaro de trueno EJ": "Thunderbird LE",
-    "Pássaro Trovejante LE": "Thunderbird LE",
-    "Queda Cerúlea LE": "Cerulean Fall LE",
-    "Rafa otchłani ER": "Abyssal Reef LE",
-    "Recife Abissal LE": "Abyssal Reef LE",
-    "Redemoinho LE (Grande Vazio)": "Whirlwind LE (Void)",
-    "Redshift EE": "Redshift LE",
-    "Redshift LE": "Redshift LE",
-    "Remanso LE": "Backwater LE",
-    "Remolino EJ (Void)": "Whirlwind LE (Void)",
-    "Rivages bleus EC": "Blueshift LE",
-    "Rojo amanecer EJ": "Redshift LE",
-    "Rosanegro LE": "Blackpink LE",
-    "Rotstich LE": "Redshift LE",
-    "Rovine di Seras": "Ruins of Seras",
-    "Rozdroże Kairosa ER": "Kairos Junction LE",
-    "Ruinas de Seras": "Ruins of Seras",
-    "Ruinen von Seras": "Ruins of Seras",
-    "Ruines de Seras": "Ruins of Seras",
-    "Ruins of Seras": "Ruins of Seras",
-    "Ruiny Serasu": "Ruins of Seras",
-    "Ruiny na Bel'Shir ER (Void)": "Bel'Shir Vestige LE (Void)",
-    "Ruínas de Seras": "Ruins of Seras",
-    "Récif abyssal EC": "Abyssal Reef LE",
-    "Sanctuaire des ténèbres EC": "Darkness Sanctuary LE",
-    "Sang chaud EC": "Blood Boil LE",
-    "Sangue Fervente LE": "Blood Boil LE",
-    "Sanktuarium Mroku ER": "Darkness Sanctuary LE",
-    "Santuario de la oscuridad EE": "Darkness Sanctuary LE",
-    "Santuario de la oscuridad EJ": "Darkness Sanctuary LE",
-    "Santuario dell'Oscurità LE": "Darkness Sanctuary LE",
-    "Santuário das Trevas LE": "Darkness Sanctuary LE",
-    "Scalo dei difensori LE": "Defender's Landing LE",
-    "Scalo orbitale": "Orbital Shipyard",
-    "Schlacht am Pier LE": "Battle on the Boardwalk LE",
-    "Secuenciador EE": "Sequencer LE",
-    "Secuenciador EJ": "Sequencer LE",
-    "Sekwencer ER": "Sequencer LE",
-    "Sequencer LE": "Sequencer LE",
-    "Sequenciador LE": "Sequencer LE",
-    "Sequenz LE": "Sequencer LE",
-    "Sequenza LE": "Sequencer LE",
-    "Sitio de investigación EJ": "Para Site LE",
-    "Sito biotecnologico LE": "Para Site LE",
-    "Stacja Proxima ER": "Proxima Station LE",
-    "Stasi LE": "Stasis LE",
-    "Stasis LE": "Stasis LE",
-    "Station Proxima EC": "Proxima Station LE",
-    "Stazione Kairos LE": "Kairos Junction LE",
-    "Stazione Proxima LE": "Proxima Station LE",
-    "Stocznia orbitalna": "Orbital Shipyard",
-    "Strade perdute LE": "Lost and Found LE",
-    "Super turbo 84 LE": "Turbo Cruise '84 LE",
-    "Szczelina ER": "Fracture LE",
-    "Séquenceur EC": "Sequencer LE",
-    "Terraplenes de priones": "Prion Terraces",
-    "Terras da Honra LE": "Honorgrounds LE",
-    "Terrassen von Prion": "Prion Terraces",
-    "Terrasses de Prion": "Prion Terraces",
-    "Terrazas de Prion": "Prion Terraces",
-    "Terrazze Prion": "Prion Terraces",
-    "Terraços Prion": "Prion Terraces",
-    "Territorio di Newkirk TE (Void)": "Newkirk Precinct TE (Void)",
-    "Thunderbird EE": "Thunderbird LE",
-    "Thunderbird LE": "Thunderbird LE",
-    "Tiefseeriff LE": "Abyssal Reef LE",
-    "Tierra azul EJ": "Blueshift LE",
-    "Torbellino EE (Void)": "Whirlwind LE (Void)",
-    "Torres Crepusculares": "Dusk Towers",
-    "Torres crepusculares": "Dusk Towers",
-    "Torres del ocaso": "Dusk Towers",
-    "Torri del crepuscolo": "Dusk Towers",
-    "Tourbillon EC (Void)": "Whirlwind LE (Void)",
-    "Tours du crépuscule": "Dusk Towers",
-    "Traumfänger LE": "Dreamcatcher LE",
-    "Turbo Cruise '84 EE": "Turbo Cruise '84 LE",
-    "Turbo Cruise '84 LE": "Turbo Cruise '84 LE",
-    "Turbodrome ’84 EC": "Turbo Cruise '84 LE",
-    "Turbojazda 84 ER": "Turbo Cruise '84 LE",
-    "Uccello del tuono LE": "Thunderbird LE",
-    "Ulrena": "Ulrena",
-    "Usina Ácida LE": "Acid Plant LE",
-    "Vado EE": "Backwater LE",
-    "Vale dos Cactos ER (Void)": "Cactus Valley LE (Void)",
-    "Valle de cactus EE (Void)": "Cactus Valley LE (Void)",
-    "Valle de los cactus EJ (Void)": "Cactus Valley LE (Void)",
-    "Valle dei cactus LE (Void)": "Cactus Valley LE (Void)",
-    "Vallée des cactus EC (Void)": "Cactus Valley LE (Void)",
-    "Vestiges de Bel’Shir EC (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vestigia di Bel'Shir LE (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vestigio de Bel'Shir EJ (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vestigio de Bel’Shir EE (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vestígio Bel'Shir LE (Void)": "Bel'Shir Vestige LE (Void)",
-    "Vigília Leste LE": "Eastwatch LE",
-    "Vortice LE (Void)": "Whirlwind LE (Void)",
-    "Whirlwind LE (Void)": "Whirlwind LE (Void)",
-    "Wielki Błękit ER": "Blueshift LE",
-    "Wieże Zmierzchu": "Dusk Towers",
-    "Wir ER (Void)": "Whirlwind LE (Void)",
-    "Wirbelwind LE (Void)": "Whirlwind LE (Void)",
-    "Wschodnia Strażnica ER": "Eastwatch LE",
-    "Wyniesienie na Aiur ER": "Ascension to Aiur LE",
-    "Wytwórnia Kwasu ER": "Acid Plant LE",
-    "Zagubione Drogi ER": "Lost and Found LE",
-    "Zarośnięta placówka": "Overgrown Facility",
-    "Zastój ER": "Stasis LE",
-    "Zentralprotokoll": "Central Protocol",
-    "Zimna krew ER": "Blood Boil LE",
-    "Área de Newkirk ET (Void)": "Newkirk Precinct TE (Void)",
-    "Überwucherte Anlage": "Overgrown Facility",
-    "Łapacz Snów ER": "Dreamcatcher LE",
-    "Święta ziemia ER": "Honorgrounds LE",
-    "Абиогенез РВ": "Abiogenesis LE",
-    "Автоматон РВ": "Automaton LE",
-    "Акрополь РВ": "Acropolis LE",
-    "Арена чести РВ": "Honorgrounds LE",
-    "Битва на променаде РВ": "Battle on the Boardwalk LE",
-    "Буревестник РВ": "Thunderbird LE",
-    "Бюро находок РВ": "Lost and Found LE",
-    "Вихрь РВ (Void)": "Whirlwind LE (Void)",
-    "Восточный дозор РВ": "Eastwatch LE",
-    "Высадка защитников РВ": "Defender's Landing LE",
-    "Глубоководный риф РВ": "Abyssal Reef LE",
-    "Гребень Лерилак": "Lerilak Crest",
-    "Долина кактусов РВ (Void)": "Cactus Valley LE (Void)",
-    "Заросший комплекс": "Overgrown Facility",
-    "Затон РВ": "Backwater LE",
-    "Зона высадки РВ": "Para Site LE",
-    "Катализатор РВ": "Catalyst LE",
-    "Киберлес РВ": "Cyber Forest LE",
-    "Кипящая кровь РВ": "Blood Boil LE",
-    "Кислотный завод РВ": "Acid Plant LE",
-    "Королевская бухта РВ": "King's Cove LE",
-    "Красное смещение РВ": "Redshift LE",
-    "Лазурные утесы РВ": "Cerulean Fall LE",
-    "Ловец снов РВ": "Dreamcatcher LE",
-    "Неоновая площадь РВ": "Neon Violet Square LE",
-    "Новое противостояние РВ": "New Repugnancy LE",
-    "Одиссея РВ": "Odyssey LE",
-    "Окраины Ньюкирка РВ (Void)": "Newkirk Precinct TE (Void)",
-    "Орбитальная верфь": "Orbital Shipyard",
-    "Порт Александр РВ": "Port Aleksander LE",
-    "Поток лавы": "Lava Flow",
-    "Путь на Айур РВ": "Ascension to Aiur LE",
-    "Развалины Бел-Шира РВ (Void)": "Bel'Shir Vestige LE (Void)",
-    "Раскол РВ": "Fracture LE",
-    "Руины Сераса": "Ruins of Seras",
-    "Святилище тьмы РВ": "Darkness Sanctuary LE",
-    "Секвенсор РВ": "Sequencer LE",
-    "Синее смещение РВ": "Blueshift LE",
-    "Стазис РВ": "Stasis LE",
-    "Станция Кайрос РВ": "Kairos Junction LE",
-    "Станция Проксима РВ": "Proxima Station LE",
-    "Сумеречные башни": "Dusk Towers",
-    "Террасы Приона": "Prion Terraces",
-    "Турбокруиз-84 РВ": "Turbo Cruise '84 LE",
-    "Ульрена": "Ulrena",
-    "Центральный протокол": "Central Protocol",
-    "东部哨塔 - 天梯版": "Eastwatch LE",
-    "中央管制平台": "Central Protocol",
-    "中心协议": "Central Protocol",
-    "乌雷纳": "Ulrena",
-    "亚历山大港 - 天梯版": "Port Aleksander LE",
-    "亞歷克桑德港 - 天梯版": "Port Aleksander LE",
-    "仙人掌谷地 - 天梯版（虛空之遺）": "Cactus Valley LE (Void)",
-    "保卫者登陆场 - 天梯版": "Defender's Landing LE",
-    "冰棱裂境-天梯版": "Fracture LE",
-    "凯罗斯中转站-天梯版": "Kairos Junction LE",
-    "凱羅斯交會點 - 天梯版": "Kairos Junction LE",
-    "刺激 - 天梯版": "Catalyst LE",
-    "勒瑞萊克山巔": "Lerilak Crest",
-    "動力巡航 '84 - 天梯版": "Turbo Cruise '84 LE",
-    "匹昂台地": "Prion Terraces",
-    "十六宫格 - 天梯版": "Sequencer LE",
-    "国王藏宝地-天梯版": "King's Cove LE",
-    "國王海灣 - 天梯版": "King's Cove LE",
-    "地龙岛-天梯版": "Para Site LE",
-    "天藍降臨 - 天梯版": "Cerulean Fall LE",
-    "失物招領 - 天梯版": "Lost and Found LE",
-    "失而复得-天梯版": "Lost and Found LE",
-    "奥德赛 - 天梯版": "Odyssey LE",
-    "奧德賽 - 天梯版": "Odyssey LE",
-    "宿怨战场-天梯版": "New Repugnancy LE",
-    "密林基地": "Overgrown Facility",
-    "寒血灼地 - 天梯版": "Blood Boil LE",
-    "岩浆浮台": "Lava Flow",
-    "幽暗之塔": "Dusk Towers",
-    "強酸工廠 - 天梯版": "Acid Plant LE",
-    "强酸工厂 - 天梯版": "Acid Plant LE",
-    "截流窪地 - 天梯版": "Backwater LE",
-    "戰意高昂之境 - 天梯版": "Catalyst LE",
-    "拜舍尔遗迹 - 天梯版（虚空）": "Bel'Shir Vestige LE (Void)",
-    "捕夢網 - 天梯版": "Dreamcatcher LE",
-    "新柯尔克辖区 - 锦标赛版 (虚空)": "Newkirk Precinct TE (Void)",
-    "新生衝突點 - 天梯版": "New Repugnancy LE",
-    "旋風之境 - 天梯版（虛空之遺）": "Whirlwind LE (Void)",
-    "旋风平台 - 天梯版 (虚空)": "Whirlwind LE (Void)",
-    "星軌船廠": "Orbital Shipyard",
-    "普列昂台地": "Prion Terraces",
-    "普罗西玛空间站 - 天梯版": "Proxima Station LE",
-    "普羅希瑪太空站 - 天梯版": "Proxima Station LE",
-    "暮色之塔": "Dusk Towers",
-    "木板小路之戰 - 天梯版": "Battle on the Boardwalk LE",
-    "木路围沙 - 天梯版": "Battle on the Boardwalk LE",
-    "机械城 - 天梯版": "Automaton LE",
-    "東方守望 - 天梯版": "Eastwatch LE",
-    "榮耀戰地 - 天梯版": "Honorgrounds LE",
-    "死水之地 - 天梯版": "Backwater LE",
-    "汇龙岛 - 天梯版": "Catalyst LE",
-    "涡轮巡飞城-天梯版": "Turbo Cruise '84 LE",
-    "深海暗礁 - 天梯版": "Abyssal Reef LE",
-    "深海礁岩 - 天梯版": "Abyssal Reef LE",
-    "滨海卫城-天梯版": "Acropolis LE",
-    "烏瑞那": "Ulrena",
-    "熔漿岩地": "Lava Flow",
-    "环轨星港": "Orbital Shipyard",
-    "瑟拉斯废墟": "Ruins of Seras",
-    "瑟拉斯遺跡": "Ruins of Seras",
-    "生命之源 - 天梯版": "Abiogenesis LE",
-    "破碎之地 - 天梯版": "Fracture LE",
-    "程序裝置 - 天梯版": "Sequencer LE",
-    "空降地點 - 天梯版": "Para Site LE",
-    "空降守軍 - 天梯版": "Defender's Landing LE",
-    "粉紅黑都": "Blackpink LE",
-    "粉红之都 - 天梯版": "Blackpink LE",
-    "紅移 - 天梯版": "Redshift LE",
-    "紅粉不夜城": "Blackpink LE",
-    "紐科克管轄區 - 聯賽版（虛空之遺）": "Newkirk Precinct TE (Void)",
-    "綠溢設施": "Overgrown Facility",
-    "红移-天梯版": "Redshift LE",
-    "缀蓝秋原-天梯版": "Cerulean Fall LE",
-    "缘起之地 - 天梯版": "Abiogenesis LE",
-    "自動化 - 天梯版": "Automaton LE",
-    "艾爾飛升 - 天梯版": "Ascension to Aiur LE",
-    "荣耀战场 - 天梯版": "Honorgrounds LE",
-    "莱瑞拉克之冠": "Lerilak Crest",
-    "蓝移-天梯版": "Blueshift LE",
-    "藍移 - 天梯版": "Blueshift LE",
-    "血沸 - 天梯版": "Blood Boil LE",
-    "衛城 - 天梯版": "Acropolis LE",
-    "貝爾石遺跡 - 天梯版（虛空之遺）": "Bel'Shir Vestige LE",
-    "赛博森林-天梯版": "Cyber Forest LE",
-    "逐梦之地-天梯版": "Dreamcatcher LE",
-    "雷鳥 - 天梯版": "Thunderbird LE",
-    "雷鸟-天梯版": "Thunderbird LE",
-    "電子叢林 - 天梯版": "Cyber Forest LE",
-    "霓虹广场 - 天梯版": "Neon Violet Square LE",
-    "霓虹紫羅蘭廣場 - 天梯版": "Neon Violet Square LE",
-    "静滞区 - 天梯版": "Stasis LE",
-    "靜滯 - 天梯版": "Stasis LE",
-    "飞升艾尔 - 天梯版": "Ascension to Aiur LE",
-    "黑暗聖殿 - 天梯版": "Darkness Sanctuary LE",
-    "黑暗避难所-天梯版": "Darkness Sanctuary LE",
-    "궤도 조선소": "Orbital Shipyard",
-    "네온 바이올렛 스퀘어 - 래더": "Neon Violet Square LE",
-    "뉴 리퍼그넌시 - 래더": "New Repugnancy LE",
-    "뉴커크 재개발 지구 - 토너먼트 (공허의 유산)": "Newkirk Precinct TE (Void)",
-    "다크니스 생츄어리 - 래더": "Darkness Sanctuary LE",
-    "돌개바람 - 래더 (공허의 유산)": "Whirlwind LE",
-    "드림캐처 - 래더": "Dreamcatcher LE",
-    "디펜더스 랜딩 - 래더": "Defender's Landing LE",
-    "레드쉬프트 - 래더": "Redshift LE",
-    "레릴락 마루": "Lerilak Crest",
-    "로스트 앤 파운드 - 래더": "Lost and Found LE",
-    "무성한 시설": "Overgrown Facility",
-    "배틀 온 더 보드워크 - 래더": "Battle on the Boardwalk LE",
-    "백워터 - 래더": "Backwater LE",
-    "벨시르 잔재 (공허)": "Bel'Shir Vestige LE",
-    "블랙핑크 - 래더": "Blackpink LE",
-    "블러드 보일 - 래더": "Blood Boil LE",
-    "블루시프트 - 래더": "Blueshift LE",
-    "블루시프트 LE": "Blueshift LE",
-    "사이버 포레스트 - 래더": "Cyber Forest LE",
-    "세라스 폐허": "Ruins of Seras",
-    "세룰리안 폴 - 래더": "Cerulean Fall LE",
-    "세룰리안 폴 LE": "Cerulean Fall LE",
-    "스테이시스 - 래더": "Stasis LE",
-    "시퀀스 - LE": "Sequencer LE",
-    "썬더버드 - 래더": "Thunderbird LE",
-    "아너그라운드 - 래더": "Honorgrounds LE",
-    "아크로폴리스 - 래더": "Acropolis LE",
-    "애씨드 플랜트 - 래더": "Acid Plant LE",
-    "어비설 리프 - 래더": "Abyssal Reef LE",
-    "어센션 투 아이어 - 래더": "Ascension to Aiur LE",
-    "어스름 탑": "Dusk Towers",
-    "에이바이오제니시스 - 래더": "Abiogenesis LE",
-    "오딧세이 - 래더": "Odyssey LE",
-    "오토메이턴 - 래더": "Automaton LE",
-    "용암 흐름": "Lava Flow",
-    "울레나": "Ulrena",
-    "이스트와치 - 래더": "Eastwatch LE",
-    "중앙 프로토콜": "Central Protocol",
-    "카이로스 정션 - 래더": "Kairos Junction LE",
-    "카탈리스트 - 래더": "Catalyst LE",
-    "캑터스 밸리 - 래더 (공허)": "Cactus Valley LE",
-    "킹스 코브 - 래더": "King's Cove LE",
-    "터보 크루즈 '84 - 래더": "Turbo Cruise '84 LE",
-    "파라 사이트 - 래더": "Para Site LE",
-    "파라 사이트 LE": "Para Site LE",
-    "포트 알렉산더 - 래더": "Port Aleksander LE",
-    "프랙처 LE": "Fracture LE",
-    "프록시마 정거장 - 래더": "Proxima Station LE",
-    "프리온 단구": "Prion Terraces",
-    "Jasny Cie\u0144 ER": "Lightshade LE",
-    "Luces y sombras EE": "Lightshade LE",
-    "Lueur nocturne EC": "Lightshade LE",
-    "Nuance LE": "Lightshade LE",
-    "Schattenlicht LE": "Lightshade LE",
-    "Sombraluz EJ": "Lightshade LE",
-    "\u5149\u5f71\u4ea4\u9519-\u5929\u68af\u7248": "Lightshade LE",
-    "\u5149\u8207\u5f71 - \u5929\u68af\u7248": "Lightshade LE",
-    "\ub77c\uc774\ud2b8\uc250\uc774\ub4dc - \ub798\ub354": "Lightshade LE",
-    "Lightshade LE": "Lightshade LE",
-    "Aura da Morte LE": "Deathaura LE",
-    "Aura de mort EC": "Deathaura LE",
-    "Aura letal EJ": "Deathaura LE",
-    "Aura mortal EE": "Deathaura LE",
-    "Aura mortale LE": "Deathaura LE",
-    "Aura Śmierci ER": "Deathaura LE",
-    "Deathaura LE": "Deathaura LE",
-    "Goldene Säulen LE": "Pillars of Gold LE",
-    "Jagannatha": "Jagannatha LE",
-    "Jagannatha EC": "Jagannatha LE",
-    "Jagannatha EE": "Jagannatha LE",
-    "Jagannatha EJ": "Jagannatha LE",
-    "Jagannatha ER": "Jagannatha LE",
-    "Jagannatha LE": "Jagannatha LE",
-    "Oksydacja ER": "Oxide LE",
-    "Ossido LE": "Oxide LE",
-    "Oxid LE": "Oxide LE",
-    "Oxide LE": "Oxide LE",
-    "Oxyde EC": "Oxide LE",
-    "Pilares de Ouro LE": "Pillars of Gold LE",
-    "Pilares de oro EE": "Pillars of Gold LE",
-    "Pilares de oro EJ": "Pillars of Gold LE",
-    "Pilastri dorati LE": "Pillars of Gold LE",
-    "Piliers d'or EC": "Pillars of Gold LE",
-    "Pillars of Gold LE": "Pillars of Gold LE",
-    "Podwodny Świat ER": "Submarine LE",
-    "Romanticide EC": "Romanticide LE",
-    "Romanticide LE": "Romanticide LE",
-    "Romanticidio EE": "Romanticide LE",
-    "Romanticidio EJ": "Romanticide LE",
-    "Romanticidio LE": "Romanticide LE",
-    "Romanticídio LE": "Romanticide LE",
-    "Romantizid LE": "Romanticide LE",
-    "Romantyzm ER": "Romanticide LE",
-    "Sottomarina LE": "Submarine LE",
-    "Sous-marin EC": "Submarine LE",
-    "Submarine LE": "Submarine LE",
-    "Submarino EE": "Submarine LE",
-    "Submarino EJ": "Submarine LE",
-    "Submarino LE": "Submarine LE",
-    "Todesaura LE": "Deathaura LE",
-    "U-Boot LE": "Submarine LE",
-    "Złote Filary ER": "Pillars of Gold LE",
-    "Óxido EE": "Oxide LE",
-    "Óxido EJ": "Oxide LE",
-    "Óxido LE": "Oxide LE",
-    "Аура смерти РВ": "Deathaura LE",
-    "Золотые столпы РВ": "Pillars of Gold LE",
-    "Оксид РВ": "Oxide LE",
-    "Подводный мир РВ": "Submarine LE",
-    "Романтицид РВ": "Romanticide LE",
-    "Яганната РВ": "Jagannatha LE",
-    "世界主宰-天梯版": "Jagannatha LE",
-    "札格納特 - 天梯版": "Jagannatha LE",
-    "死亡光环-天梯版": "Deathaura LE",
-    "死亡光環 - 天梯版": "Deathaura LE",
-    "氧化物質 - 天梯版": "Oxide LE",
-    "潛水艇 - 天梯版": "Submarine LE",
-    "潜水艇-天梯版": "Submarine LE",
-    "紫晶浪漫-天梯版": "Romanticide LE",
-    "羅曼死 - 天梯版": "Romanticide LE",
-    "锈化山巅-天梯版": "Oxide LE",
-    "黃金之柱 - 天梯版": "Pillars of Gold LE",
-    "黄金之柱-天梯版": "Pillars of Gold LE",
-    "데스오라 - 래더": "Deathaura LE",
-    "로맨티사이드 - 래더": "Romanticide LE",
-    "서브머린 - 래더": "Submarine LE",
-    "옥사이드 - 래더": "Oxide LE",
-    "자가나타 - 래더": "Jagannatha LE",
-    "필러스 오브 골드 - 래더": "Pillars of Gold LE",
-    "2.000 Atmosferas LE": "2000 Atmospheres LE",
-    "2.000 Atmosfere LE": "2000 Atmospheres LE",
-    "2000 Atmospheres LE": "2000 Atmospheres LE",
-    "2000 Atmosphären LE": "2000 Atmospheres LE",
-    "2000 Atmósferas EE": "2000 Atmospheres LE",
-    "2000 atmosfer ER": "2000 Atmospheres LE",
-    "2000 atmósferas EJ": "2000 Atmospheres LE",
-    "2000 атмосфер РВ": "2000 Atmospheres LE",
-    "2000 애트모스피어 - 래더": "2000 Atmospheres LE",
-    "2000大氣壓力 - 天梯版": "2000 Atmospheres LE",
-    "Atmosphère 2 000 EC": "2000 Atmospheres LE",
-    "Beckett Industries ER": "Beckett Industries LE",
-    "Beckett Industries LE": "Beckett Industries LE",
-    "Blackburn EC": "Blackburn LE",
-    "Blackburn EE": "Blackburn LE",
-    "Blackburn LE": "Blackburn LE",
-    "Czarny Żar ER": "Blackburn LE",
-    "Industrias Beckett EE": "Beckett Industries LE",
-    "Industrias Beckett EJ": "Beckett Industries LE",
-    "Industrie Beckett LE": "Beckett Industries LE",
-    "Industries Beckett EC": "Beckett Industries LE",
-    "Quemadura negra EJ": "Blackburn LE",
-    "Schwarzbrand LE": "Blackburn LE",
-    "Ustione LE": "Blackburn LE",
-    "Беккетт Индастриз РВ": "Beckett Industries LE",
-    "Блэкберн РВ": "Blackburn LE",
-    "大气2000-天梯版": "2000 Atmospheres LE",
-    "布萊克本 - 天梯版": "Blackburn LE",
-    "貝克基特工業區 - 天梯版": "Beckett Industries LE",
-    "贝克特工业-天梯版": "Beckett Industries LE",
-    "黑色燃烧-天梯版": "Blackburn LE",
-    "베케트 인더스트리즈 - 래더": "Beckett Industries LE",
-    "블랙번 - 래더": "Blackburn LE",
-    "Atropo LE": "Nightshade LE",
-    "Beladona LE": "Nightshade LE",
-    "Belladona EE": "Nightshade LE",
-    "Belladona EJ": "Nightshade LE",
-    "Belladone EC": "Nightshade LE",
-    "Czyściciele i przemysłowcy ER": "Purity and Industry LE",
-    "Empire éternel EC": "Eternal Empire LE",
-    "Eternal Empire LE": "Eternal Empire LE",
-    "Ever Dream LE": "Ever Dream LE",
-    "Ewiger Traum LE": "Ever Dream LE",
-    "Ewiges Imperium LE": "Eternal Empire LE",
-    "Golden Wall LE": "Golden Wall LE",
-    "Goldener Wall LE": "Golden Wall LE",
-    "Imperio eterno EE": "Eternal Empire LE",
-    "Imperio eterno EJ": "Eternal Empire LE",
-    "Impero eterno LE": "Eternal Empire LE",
-    "Império Eterno LE": "Eternal Empire LE",
-    "Mur doré EC": "Golden Wall LE",
-    "Muraglia dorata LE": "Golden Wall LE",
-    "Muralha Dourada LE": "Golden Wall LE",
-    "Muralla dorada EJ": "Golden Wall LE",
-    "Muro dorado EE": "Golden Wall LE",
-    "Nachtschatten LE": "Nightshade LE",
-    "Nightshade LE": "Nightshade LE",
-    "Nocny Mrok ER": "Nightshade LE",
-    "Pureté et industrie EC": "Purity and Industry LE",
-    "Pureza e Engenho LE": "Purity and Industry LE",
-    "Pureza e industria EE": "Purity and Industry LE",
-    "Pureza e industria EJ": "Purity and Industry LE",
-    "Purezza e industria LE": "Purity and Industry LE",
-    "Purity and Industry LE": "Purity and Industry LE",
-    "Reinheit und Industrie LE": "Purity and Industry LE",
-    "Rhoskallia EC": "Rhoskallian LE",
-    "Rhoskallian EE": "Rhoskallian LE",
-    "Rhoskallian EJ": "Rhoskallian LE",
-    "Rhoskallian LE": "Rhoskallian LE",
-    "Rhoskallianas LE": "Rhoskallian LE",
-    "Roskalian ER": "Rhoskallian LE",
-    "Rêve éternel EC": "Ever Dream LE",
-    "Simulacre EC": "Simulacrum LE",
-    "Simulacro EE": "Simulacrum LE",
-    "Simulacro EJ": "Simulacrum LE",
-    "Simulacro LE": "Simulacrum LE",
-    "Simulacrum LE": "Simulacrum LE",
-    "Sogno eterno LE": "Ever Dream LE",
-    "Sonho Sempiterno LE": "Ever Dream LE",
-    "Sueño eterno EE": "Ever Dream LE",
-    "Sueño eterno EJ": "Ever Dream LE",
-    "Symulakrum ER": "Simulacrum LE",
-    "Wieczne Imperium ER": "Eternal Empire LE",
-    "Wieczny Sen ER": "Ever Dream LE",
-    "Zen EC": "Zen LE",
-    "Zen EE": "Zen LE",
-    "Zen EJ": "Zen LE",
-    "Zen ER": "Zen LE",
-    "Zen LE": "Zen LE",
-    "Złoty Mur ER": "Golden Wall LE",
-    "Вечная империя РВ": "Eternal Empire LE",
-    "Дзен РВ": "Zen LE",
-    "Золотая стена РВ": "Golden Wall LE",
-    "Ночной сумрак РВ": "Nightshade LE",
-    "Помечтай РВ": "Ever Dream LE",
-    "Роскаллиан РВ": "Rhoskallian LE",
-    "Симулякр РВ": "Simulacrum LE",
-    "Чистота и производство РВ": "Purity and Industry LE",
-    "天然與工業 - 天梯版": "Purity and Industry LE",
-    "工业与净化-天梯版": "Purity and Industry LE",
-    "擬像之城 - 天梯版": "Simulacrum LE",
-    "毒茄樹叢 - 天梯版": "Nightshade LE",
-    "永恆之夢 - 天梯版": "Ever Dream LE",
-    "永恆帝國 - 天梯版": "Eternal Empire LE",
-    "永恒帝国-天梯版": "Eternal Empire LE",
-    "永恒梦境-天梯版": "Ever Dream LE",
-    "洛斯卡利安 - 天梯版": "Rhoskallian LE",
-    "禅园-天梯版": "Zen LE",
-    "禪 - 天梯版": "Zen LE",
-    "紫夜-天梯版": "Nightshade LE",
-    "罗斯卡联-天梯版": "Rhoskallian LE",
-    "虚拟幻境-天梯版": "Simulacrum LE",
-    "黃金之牆 - 天梯版": "Golden Wall LE",
-    "黄金墙-天梯版": "Golden Wall LE",
-    "골든 월 - 래더": "Golden Wall LE",
-    "나이트쉐이드 - 래더": "Nightshade LE",
-    "로스칼리언 - 래더": "Rhoskallian LE",
-    "시뮬레이크럼 - 래더": "Simulacrum LE",
-    "에버 드림 - 래더": "Ever Dream LE",
-    "이터널 엠파이어 - 래더": "Eternal Empire LE",
-    "젠 - 래더": "Zen LE",
-    "퓨리티 앤 인더스트리 - 래더": "Purity and Industry LE",
-    "Concord LE": "Concord LE",
-    "Concorde EC": "Concord LE",
-    "Concordia EE": "Concord LE",
-    "Concordia EJ": "Concord LE",
-    "Concordia LE": "Concord LE",
-    "Konsonans ER": "Concord LE",
-    "Согласие РВ": "Concord LE",
-    "協同合作 - 天梯版": "Concord LE",
-    "康科德-天梯版": "Concord LE",
-    "콩코드 - 래더": "Concord LE",
-    "Eis und Chrom LE": "Ice and Chrome LE",
-    "Gelo e Cromo LE": "Ice and Chrome LE",
-    "Ghiacciocromo LE": "Ice and Chrome LE",
-    "Glace et chrome EC": "Ice and Chrome LE",
-    "Hielo y cromo EE": "Ice and Chrome LE",
-    "Hielo y cromo EJ": "Ice and Chrome LE",
-    "Ice and Chrome LE": "Ice and Chrome LE",
-    "Lód i Chrom ER": "Ice and Chrome LE",
-    "Лед и хром РВ": "Ice and Chrome LE",
-    "冰雪合金-天梯版": "Ice and Chrome LE",
-    "寒冰與金屬 - 天梯版": "Ice and Chrome LE",
-    "아이스 앤 크롬 - 래더": "Ice and Chrome LE",
-    "Domaine des dormeurs EC": "World of Sleepers LE",
-    "Efemeron LE": "Ephemeron LE",
-    "Efemeryda ER": "Ephemeron LE",
-    "Efeméride LE": "Ephemeron LE",
-    "Efímero EE": "Ephemeron LE",
-    "Efímero EJ": "Ephemeron LE",
-    "Ephemeron LE": "Ephemeron LE",
-    "Mondo dei sogni LE": "World of Sleepers LE",
-    "Mundo Adormecido LE": "World of Sleepers LE",
-    "Mundo de durmientes EE": "World of Sleepers LE",
-    "Mundo de durmientes EJ": "World of Sleepers LE",
-    "Triton EC": "Triton LE",
-    "Triton LE": "Triton LE",
-    "Tritone LE": "Triton LE",
-    "Tritão LE": "Triton LE",
-    "Tritón EE": "Triton LE",
-    "Tritón EJ": "Triton LE",
-    "Tryton ER": "Triton LE",
-    "Welt der Schläfer LE": "World of Sleepers LE",
-    "World of Sleepers LE": "World of Sleepers LE",
-    "Éphémèrion EC": "Ephemeron LE",
-    "Świat Śniących ER": "World of Sleepers LE",
-    "Мир спящих РВ": "World of Sleepers LE",
-    "Тритон РВ": "Triton LE",
-    "Эфемерон РВ": "Ephemeron LE",
-    "伊菲莫隆-天梯版": "Ephemeron LE",
-    "休眠者之境 - 天梯版": "World of Sleepers LE",
-    "梦境世界-天梯版": "World of Sleepers LE",
-    "海神信使 - 天梯版": "Triton LE",
-    "特里同-天梯版": "Triton LE",
-    "轉瞬即逝 - 天梯版": "Ephemeron LE",
-    "월드 오브 슬리퍼스 - 래더": "World of Sleepers LE",
-    "이페머론 - 래더": "Ephemeron LE",
-    "트라이튼 - 래더": "Triton LE",
-    "Anbeginn LE": "Year Zero LE",
-    "Anno zero LE": "Year Zero LE",
-    "Année zéro EC": "Year Zero LE",
-    "Ano Zero LE": "Year Zero LE",
-    "Año Cero EE": "Year Zero LE",
-    "Año cero EJ": "Year Zero LE",
-    "Rok Zero ER": "Year Zero LE",
-    "Year Zero LE": "Year Zero LE",
-    "Год нулевой РВ": "Year Zero LE",
-    "紀年起源 - 天梯版": "Year Zero LE",
-    "零纪元-天梯版": "Year Zero LE",
-    "이어 제로 - 래더": "Year Zero LE",
-    "Barreira do Inverno LE": "Winter's Gate LE",
-    "Baño de sangre en la disco EE": "Disco Bloodbath LE",
-    "Carnage disco EC": "Disco Bloodbath LE",
-    "Disco Bloodbath LE": "Disco Bloodbath LE",
-    "Disco-Massaker LE": "Disco Bloodbath LE",
-    "Discomassacro LE": "Disco Bloodbath LE",
-    "Masacre disco EJ": "Disco Bloodbath LE",
-    "Massacre na Discoteca LE": "Disco Bloodbath LE",
-    "Porte dell'inverno LE": "Winter's Gate LE",
-    "Portes de l’hiver EC": "Winter's Gate LE",
-    "Puerta del invierno EE": "Winter's Gate LE",
-    "Puerta del invierno EJ": "Winter's Gate LE",
-    "Rzeźnia Disco ER": "Disco Bloodbath LE",
-    "Winter's Gate LE": "Winter's Gate LE",
-    "Wintertor LE": "Winter's Gate LE",
-    "Zimowe Wrota ER": "Winter's Gate LE",
-    "Зимние врата РВ": "Winter's Gate LE",
-    "Кровавое диско РВ": "Disco Bloodbath LE",
-    "寒冬之門 - 天梯版": "Winter's Gate LE",
-    "浴血迪斯科-天梯版": "Disco Bloodbath LE",
-    "血戰迪斯可 - 天梯版": "Disco Bloodbath LE",
-    "黑冬隘口-天梯版": "Winter's Gate LE",
-    "디스코 블러드배스 - 래더": "Disco Bloodbath LE",
-    "윈터스 게이트 - 래더": "Winter's Gate LE",
-    "Accolita LE": "Acolyte LE",
-    "Acolyte LE": "Acolyte LE",
-    "Acólito EE": "Acolyte LE",
-    "Acólito EJ": "Acolyte LE",
-    "Acólito LE": "Acolyte LE",
-    "Akolita ER": "Acolyte LE",
-    "Akolyth LE": "Acolyte LE",
-    "Baie de réparation EC": "Mech Depot LE",
-    "Baza mechów ER": "Mech Depot LE",
-    "Deposito mech LE": "Mech Depot LE",
-    "Depósito de Robôs LE": "Mech Depot LE",
-    "Eindringling LE": "Interloper LE",
-    "Interloper LE": "Interloper LE",
-    "Intrusione LE": "Interloper LE",
-    "Intruso EE": "Interloper LE",
-    "Intruso EJ": "Interloper LE",
-    "Intruso ER": "Interloper LE",
-    "Intruz ER": "Interloper LE",
-    "Mech Depot LE": "Mech Depot LE",
-    "Mech-Depot LE": "Mech Depot LE",
-    "Retraite de l’acolyte EC": "Acolyte LE",
-    "Taller mecánico EE": "Mech Depot LE",
-    "Taller mecánico EJ": "Mech Depot LE",
-    "Île des contrebandiers EC": "Interloper LE",
-    "Аколит РВ": "Acolyte LE",
-    "Меха-депо РВ": "Mech Depot LE",
-    "Незваный гость РВ": "Interloper LE",
-    "机甲维护站 - 天梯版": "Mech Depot LE",
-    "機電廠 - 天梯版": "Mech Depot LE",
-    "紫晶卫 - 天梯版": "Acolyte LE",
-    "闖入禁地 - 天梯版": "Interloper LE",
-    "闯星 - 天梯版": "Interloper LE",
-    "隨從 - 天梯版": "Acolyte LE",
-    "메크 디포 - 래더": "Mech Depot LE",
-    "애컬라이트 - 래더": "Acolyte LE",
-    "인터로퍼 - 래더": "Interloper LE",
-    "Bel'Shir Vestige LE": "Bel'Shir Vestige LE",
-    "Bel'Shir-Ruinen LE": "Bel'Shir Vestige LE",
-    "Paladino Terminal LE": "Paladino Terminal LE",
-    "Paladino-Terminal LE": "Paladino Terminal LE",
-    "Ruiny na Bel'Shir ER": "Bel'Shir Vestige LE",
-    "Terminal Paladino EC": "Paladino Terminal LE",
-    "Terminal Paladino EE": "Paladino Terminal LE",
-    "Terminal Paladino EJ": "Paladino Terminal LE",
-    "Terminal Paladino ER": "Paladino Terminal LE",
-    "Terminal Paladino LE": "Paladino Terminal LE",
-    "Vestiges de Bel’Shir EC": "Bel'Shir Vestige LE",
-    "Vestigia di Bel'Shir LE": "Bel'Shir Vestige LE",
-    "Vestigio de Bel'Shir EJ": "Bel'Shir Vestige LE",
-    "Vestigios de Bel'Shir LE": "Bel'Shir Vestige LE",
-    "Vestígio Bel'Shir ER": "Bel'Shir Vestige LE",
-    "Прерии Бел-Шира РВ": "Bel'Shir Vestige LE",
-    "Терминал Паладино РВ": "Paladino Terminal LE",
-    "帕拉蒂诺中转站 - 天梯版": "Paladino Terminal LE",
-    "帕拉迪諾站 - 天梯版": "Paladino Terminal LE",
-    "拜舍尔遗迹 - 天梯版": "Bel'Shir Vestige LE",
-    "貝爾石遺跡 - 天梯版": "Bel'Shir Vestige LE",
-    "벨시르 잔재 - 래더": "Bel'Shir Vestige LE",
-    "팔라디노 터미널 - 래더": "Paladino Terminal LE",
-    "Apoteose ER": "Apotheosis LE",
-    "Apoteosis EE": "Apotheosis LE",
-    "Apoteosis EJ": "Apotheosis LE",
-    "Apoteoza ER": "Apotheosis LE",
-    "Apotheose LE": "Apotheosis LE",
-    "Apotheosis LE": "Apotheosis LE",
-    "Apothéose EC": "Apotheosis LE",
-    "Coltre di ghiaccio LE": "Frost LE",
-    "Congelamento ER": "Frost LE",
-    "Dasan Station LE": "Dasan Station LE",
-    "Dasan-Forschungsstation LE": "Dasan Station LE",
-    "Escarcha EE": "Frost LE",
-    "Estación Dasán EE": "Dasan Station LE",
-    "Estación de Dasan EJ": "Dasan Station LE",
-    "Estação Dasan ER": "Dasan Station LE",
-    "Frost LE": "Frost LE",
-    "Frozen Temple": "Frozen Temple",
-    "Galactic Process LE": "Galactic Process LE",
-    "Galaktischer Prozess LE": "Galactic Process LE",
-    "Galaktyczna przetwórnia ER": "Galactic Process LE",
-    "Givre EC": "Frost LE",
-    "Helada EJ": "Frost LE",
-    "Mróz ER": "Frost LE",
-    "Neu-Gettysburg LE": "New Gettysburg LE",
-    "New Gettysburg EJ": "New Gettysburg LE",
-    "New Gettysburg LE": "New Gettysburg LE",
-    "Nova Gettysburg ER": "New Gettysburg LE",
-    "Nowy Gettysburg ER": "New Gettysburg LE",
-    "Nueva Getisburgo EE": "New Gettysburg LE",
-    "Néo-Gettysburg EC": "New Gettysburg LE",
-    "Opérations galactiques EC": "Galactic Process LE",
-    "Piattaforma galattica LE": "Galactic Process LE",
-    "Proceso galáctico EE": "Galactic Process LE",
-    "Proceso galáctico EJ": "Galactic Process LE",
-    "Processo Galáctico ER": "Galactic Process LE",
-    "Stacja Dasan ER": "Dasan Station LE",
-    "Station de Dasan EC": "Dasan Station LE",
-    "Stazione Dasan LE": "Dasan Station LE",
-    "Tempio ghiacciato": "Frozen Temple",
-    "Temple gelé": "Frozen Temple",
-    "Templo Congelado": "Frozen Temple",
-    "Templo congelado": "Frozen Temple",
-    "Templo helado": "Frozen Temple",
-    "Vereister Tempel": "Frozen Temple",
-    "Zamarznięta Świątynia": "Frozen Temple",
-    "King Sejong Station LE": "King Sejong Station LE",
-    "세종과학기지": "King Sejong Station LE",
-    "Апофеоз РВ": "Apotheosis LE",
-    "Галактический процесс РВ": "Galactic Process LE",
-    "Мороз РВ": "Frost LE",
-    "Новый Геттисберг РВ": "New Gettysburg LE",
-    "Станция \"Дасан\" РВ": "Dasan Station LE",
-    "Храм во льдах": "Frozen Temple",
-    "冰凍神殿": "Frozen Temple",
-    "冰封神殿": "Frozen Temple",
-    "冰霜之地 - 天梯版": "Frost LE",
-    "封神之地 - 天梯版": "Apotheosis LE",
-    "新盖茨堡 - 天梯版": "New Gettysburg LE",
-    "新蓋茨堡 - 天梯版": "New Gettysburg LE",
-    "神性之地 - 天梯版": "Apotheosis LE",
-    "茶山科學研究站 - 天梯版": "Dasan Station LE",
-    "茶山科考站 - 天梯版": "Dasan Station LE",
-    "螺旋走廊 - 天梯版": "Galactic Process LE",
-    "銀河天堂路 - 天梯版": "Galactic Process LE",
-    "霜寒之地 - 天梯版": "Frost LE",
-    "갤럭틱 프로세스 - 래더": "Galactic Process LE",
-    "뉴 게티즈버그 - 래더": "New Gettysburg LE",
-    "다산과학기지 - 래더": "Dasan Station LE",
-    "아포테오시스 - 래더": "Apotheosis LE",
-    "얼어붙은 사원": "Frozen Temple",
-    "프로스트 - 래더": "Frost LE",
-    "Catallena EC (Void)": "Catallena LE (Void)",
-    "Catallena EE (Void)": "Catallena LE (Void)",
-    "Catallena EJ (Void)": "Catallena LE (Void)",
-    "Catallena ER (Otchłań)": "Catallena LE (Void)",
-    "Catallena LE (Void)": "Catallena LE (Void)",
-    "Каталлена РВ (Void)": "Catallena LE (Void)",
-    "卡塔莉娜 - 天梯版 （虚空）": "Catallena LE (Void)",
-    "卡塔雷娜 - 天梯版 （虛空之遺）": "Catallena LE (Void)",
-    "까탈레나 - 래더 (공허)": "Catallena LE (Void)"
-}
diff --git a/processing/sc2_map_downloader/output/.gitkeep b/processing/sc2_map_downloader/output/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/processing/sc2egset_replaypack_processor/output/.gitkeep b/processing/sc2egset_replaypack_processor/output/.gitkeep
deleted file mode 100644
index e69de29..0000000

From 64458f7baea2f3865c5e50bf711871b8308a1562 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 8 Jan 2025 21:16:50 +0100
Subject: [PATCH 84/92] fix: manually tested directory_packager, working
 version

---
 .../directory_flattener.py                    |  2 +-
 .../directory_packager/directory_packager.py  | 41 +++++++++++++------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index 3c7df54..cc676a7 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -170,7 +170,7 @@ def multiple_directory_flattener(
 
     output_directories = []
     # Iterate over directories:
-    for item in tqdm(input_path.iterdir()):
+    for item in input_path.iterdir():
         maybe_dir = Path(input_path, item).resolve()
         if not maybe_dir.is_dir():
             logging.debug(f"Skipping {str(maybe_dir)}, not a directory.")
diff --git a/src/datasetpreparator/directory_packager/directory_packager.py b/src/datasetpreparator/directory_packager/directory_packager.py
index 28cb373..66ca93e 100644
--- a/src/datasetpreparator/directory_packager/directory_packager.py
+++ b/src/datasetpreparator/directory_packager/directory_packager.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from pathlib import Path
 from typing import List
 from zipfile import ZipFile, ZIP_BZIP2
@@ -10,14 +9,14 @@
 from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
 
-def multiple_dir_packager(input_path: str, force_overwrite: bool) -> List[Path]:
+def multiple_dir_packager(input_path: Path, force_overwrite: bool) -> List[Path]:
     """
     Packages the specified directory into a .zip archive.
 
     Parameters
     ----------
-    input_path : str
-        Specifies the path which will be turned into a .zip archive.
+    input_path : Path
+        Specifies the path to a directoryu for which each of its directories will be turned into a .zip archive.
     force_overwrite : bool
         Specifies if the user wants to overwrite files or directories without being prompted
 
@@ -28,14 +27,18 @@ def multiple_dir_packager(input_path: str, force_overwrite: bool) -> List[Path]:
     """
 
     output_archives = []
-    for directory in os.listdir(path=input_path):
-        directory_path = Path(input_path, directory).resolve()
+    for directory in input_path.iterdir():
+        logging.debug(f"Processing directory: {str(directory)}")
+
+        directory_path = Path(input_path, directory.name).resolve()
         if not directory_path.is_dir():
             continue
 
-        output_archives.append(
-            dir_packager(directory_path=directory_path, force_overwrite=force_overwrite)
+        logging.debug(f"Packaging directory: {str(directory_path)}")
+        processed_path = dir_packager(
+            directory_path=directory_path, force_overwrite=force_overwrite
         )
+        output_archives.append(processed_path)
 
     return output_archives
 
@@ -60,13 +63,19 @@ def dir_packager(directory_path: Path, force_overwrite: bool) -> Path:
 
     final_archive_path = directory_path.with_suffix(".zip")
 
-    if user_prompt_overwrite_ok(final_archive_path, force_overwrite=force_overwrite):
+    if user_prompt_overwrite_ok(
+        path=final_archive_path, force_overwrite=force_overwrite
+    ):
         logging.info(f"Set final archive name to: {str(final_archive_path)}")
         with ZipFile(str(final_archive_path), "w") as zip_file:
-            for file in directory_path.iterdir():
-                abs_filepath = os.path.join(directory_path, file)
+            for file in directory_path.rglob("*"):
+                abs_filepath = str(file.resolve())
+
+                logging.debug(f"Adding file: {abs_filepath}")
                 zip_file.write(
-                    filename=abs_filepath, arcname=file, compress_type=ZIP_BZIP2
+                    filename=abs_filepath,
+                    arcname=file.relative_to(directory_path),
+                    compress_type=ZIP_BZIP2,
                 )
 
     return final_archive_path
@@ -77,7 +86,13 @@ def dir_packager(directory_path: Path, force_overwrite: bool) -> Path:
 )
 @click.option(
     "--input_path",
-    type=click.Path(exists=True, dir_okay=True, file_okay=False, resolve_path=True),
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
     required=True,
     help="Input path to the directory containing the dataset that is going to be processed by packaging into .zip archives.",
 )

From 157bd500c763c9c41b1f729fbdf21af6b9bb4735 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 8 Jan 2025 21:27:04 +0100
Subject: [PATCH 85/92] feat: (directory_packager.py) added tqdm progres bar

---
 .../directory_packager/directory_packager.py  | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/datasetpreparator/directory_packager/directory_packager.py b/src/datasetpreparator/directory_packager/directory_packager.py
index 66ca93e..e90d210 100644
--- a/src/datasetpreparator/directory_packager/directory_packager.py
+++ b/src/datasetpreparator/directory_packager/directory_packager.py
@@ -4,7 +4,8 @@
 from zipfile import ZipFile, ZIP_BZIP2
 
 import click
-
+from tqdm import tqdm
+from tqdm.contrib.logging import logging_redirect_tqdm
 from datasetpreparator.settings import LOGGING_FORMAT
 from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
 
@@ -68,15 +69,20 @@ def dir_packager(directory_path: Path, force_overwrite: bool) -> Path:
     ):
         logging.info(f"Set final archive name to: {str(final_archive_path)}")
         with ZipFile(str(final_archive_path), "w") as zip_file:
-            for file in directory_path.rglob("*"):
-                abs_filepath = str(file.resolve())
-
-                logging.debug(f"Adding file: {abs_filepath}")
-                zip_file.write(
-                    filename=abs_filepath,
-                    arcname=file.relative_to(directory_path),
-                    compress_type=ZIP_BZIP2,
-                )
+            with logging_redirect_tqdm():
+                for file in tqdm(
+                    list(directory_path.rglob("*")),
+                    desc=f"Packaging {final_archive_path.name}",
+                    unit="files",
+                ):
+                    abs_filepath = str(file.resolve())
+
+                    logging.debug(f"Adding file: {abs_filepath}")
+                    zip_file.write(
+                        filename=abs_filepath,
+                        arcname=file.relative_to(directory_path),
+                        compress_type=ZIP_BZIP2,
+                    )
 
     return final_archive_path
 

From 8719e8838959bdd54aaebc98af86e69eab40a46f Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Wed, 8 Jan 2025 22:04:25 +0100
Subject: [PATCH 86/92] refactor: command saved to a variable

---
 .../utils/multiprocess.py                     | 39 +++++++++----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
index 207ed4e..3279a35 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
@@ -57,7 +57,7 @@ def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
     # Copying the mapping file that contains directory tree information:
     directory_contents = os.listdir(directory)
     if "processed_mapping.json" in directory_contents:
-        logging.debug("Found mapping json in %s", directory)
+        logging.debug(f"Found mapping json in {directory}")
         mapping_filepath = os.path.join(directory, "processed_mapping.json")
         output_mapping_filepath = os.path.join(
             output_directory_filepath, "processed_mapping.json"
@@ -65,28 +65,27 @@ def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
         shutil.copy(mapping_filepath, output_mapping_filepath)
 
     logging.debug(
-        "Running subprocess for %s with output to %s",
-        directory,
-        output_directory_filepath,
+        f"Running subprocess for {directory} with output to {output_directory_filepath}",
     )
 
     # TODO: Check if I can do a pipe from the subprocess to get multiple progress bars:
-    subprocess.run(
-        [
-            # FIXME hardcoded binary name
-            str(PATH_TO_SC2INFOEXTRACTORGO),
-            f"-input={arguments.processing_input}/",
-            f"-output={arguments.output}/",
-            f"-perform_integrity_checks={arguments.perform_integrity_checks}",
-            f"-perform_validity_checks={arguments.perform_validity_checks}",
-            f"-perform_cleanup={arguments.perform_cleanup}",
-            f"-perform_chat_anonymization={arguments.perform_chat_anonymization}",
-            f"-number_of_packages={arguments.number_of_packages}",
-            f"-max_procs={arguments.max_procs}",
-            f"-log_level={arguments.log_level}",
-            f"-log_dir={output_directory_filepath}/",
-        ]
-    )
+
+    command = [
+        # FIXME hardcoded binary name
+        str(PATH_TO_SC2INFOEXTRACTORGO),
+        f"-input={arguments.processing_input}/",
+        f"-output={arguments.output}/",
+        f"-perform_integrity_checks={arguments.perform_integrity_checks}",
+        f"-perform_validity_checks={arguments.perform_validity_checks}",
+        f"-perform_cleanup={arguments.perform_cleanup}",
+        f"-perform_chat_anonymization={arguments.perform_chat_anonymization}",
+        f"-number_of_packages={arguments.number_of_packages}",
+        f"-max_procs={arguments.max_procs}",
+        f"-log_level={arguments.log_level}",
+        f"-log_dir={output_directory_filepath}/",
+    ]
+
+    subprocess.run(command)
 
 
 def sc2egset_replaypack_processor(

From 579f34578e3fcf36338a12a02f1a6aa348116dcc Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 9 Jan 2025 01:22:21 +0100
Subject: [PATCH 87/92] build(makefile): added targets for seeding maps locally

---
 makefile | 52 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/makefile b/makefile
index f0d006f..50286aa 100644
--- a/makefile
+++ b/makefile
@@ -1,11 +1,13 @@
+CURRENT_DIR := $(dir $(abspath $(firstword $(MAKEFILE_LIST))))
+
 # Docker variables:
 DOCKER_DIR = ./docker
 DOCKER_FILE = $(DOCKER_DIR)/Dockerfile
 DOCKER_FILE_DEV = $(DOCKER_DIR)/Dockerfile.dev
 
 # Local devcontainer
-DEVCONTAINER_NAME = datasetpreparator:devcontainer
-DEV_BRANCH_CONTAINER = datasetpreparator:dev
+DEVCONTAINER = datasetpreparator:devcontainer
+DEV_BRANCH_CONTAINER = kaszanas/datasetpreparator:dev
 
 # Compose variables:
 TEST_COMPOSE = $(DOCKER_DIR)/docker-test-compose.yml
@@ -115,6 +117,28 @@ test: ## Runs the tests using the local environment.
 ###################
 #### DOCKER #######
 ###################
+.PHONY: create_temp_container
+create_temp_container:
+	@echo "Creating a temporary container."
+	@echo "Using the dev branch Docker image: $(DEVCONTAINER)"
+	docker create --name temp_container $(DEVCONTAINER)
+
+.PHONY: remove_temp_container
+remove_temp_container:
+	@echo "Removing the temporary container."
+	docker rm temp_container
+
+.PHONY: seed_maps_locally
+seed_maps_locally:
+	@echo "Seeding the maps locally."
+	@make docker_build_devcontainer
+	@echo "Using the dev branch Docker image: $(DEVCONTAINER)"
+	@make create_temp_container
+	docker cp \
+		temp_container:/app/processing/maps \
+		$(CURRENT_DIR)processing
+	@make remove_temp_container
+
 .PHONY: docker_pull
 docker_pull_dev: ## Pulls the latest image from the Docker Hub.
 	@echo "Pulling the dev branch Docker image: $(DEV_BRANCH_CONTAINER)"
@@ -134,23 +158,23 @@ docker_build_devcontainer: ## Builds the development image containing all of the
 	docker build \
 		--build-arg="PYTHON_VERSION=$(PYTHON_VERSION)" \
 		-f $(DOCKER_FILE_DEV) . \
-		--tag=$(DEVCONTAINER_NAME)
+		--tag=$(DEVCONTAINER)
 
 .PHONY: docker_run_test
 docker_run_test: ## Runs the test command using Docker.
 	docker run --rm \
-		$(DEVCONTAINER_NAME) \
+		$(DEVCONTAINER) \
 		sh -c \
 		$(TEST_COMMAND)
 
 .PHONY: docker_run_dev
 docker_run_dev: ## Runs the development image containing all of the tools.
-	@echo "Running the devcontainer image: $(DEVCONTAINER_NAME)"
+	@echo "Running the devcontainer image: $(DEVCONTAINER)"
 	docker run \
 		-v ".:/app" \
 		-it \
 		-e "TEST_WORKSPACE=/app" \
-		$(DEVCONTAINER_NAME) \
+		$(DEVCONTAINER) \
 		bash
 
 ###################
@@ -170,20 +194,20 @@ doc_build: ## Builds the Mkdocs documentation.
 docker_doc_build: ## Builds the Mkdocs documentation using Docker.
 	@echo "Building the Mkdocs documentation using Docker."
 	@make docker_build_devcontainer
-	@echo "Using the devcontainer image: $(DEVCONTAINER_NAME)"
+	@echo "Using the devcontainer image: $(DEVCONTAINER)"
 	docker run \
 		-v "./docs:/docs" \
-		$(DEVCONTAINER_NAME) \
+		$(DEVCONTAINER) \
 		poetry run mkdocs build
 
 .PHONY: docker_doc_build_action
 docker_doc_build_action: ## Builds the Mkdocs documentation using Docker.
 	@echo "Building the Mkdocs documentation using Docker."
 	@make docker_build_devcontainer
-	@echo "Using the devcontainer image: $(DEVCONTAINER_NAME)"
+	@echo "Using the devcontainer image: $(DEVCONTAINER)"
 	docker run \
 		-v "./docs:/docs" \
-		$(DEVCONTAINER_NAME) \
+		$(DEVCONTAINER) \
 		poetry run mkdocs build
 
 ###################
@@ -193,19 +217,19 @@ docker_doc_build_action: ## Builds the Mkdocs documentation using Docker.
 docker_pre_commit: ## Runs pre-commit hooks using Docker.
 	@echo "Running pre-commit hooks using Docker."
 	@make docker_build_devcontainer
-	@echo "Using the devcontainer image: $(DEVCONTAINER_NAME)"
+	@echo "Using the devcontainer image: $(DEVCONTAINER)"
 	docker run \
 		-v ".:/app" \
-		$(DEVCONTAINER_NAME) \
+		$(DEVCONTAINER) \
 		pre-commit run --all-files
 
 .PHONY: docker_pre_commit_action
 docker_pre_commit_action: ## Runs pre-commit hooks using Docker.
 	@echo "Running pre-commit hooks using Docker."
 	@make docker_build_devcontainer
-	@echo "Using the devcontainer image: $(DEVCONTAINER_NAME)"
+	@echo "Using the devcontainer image: $(DEVCONTAINER)"
 	docker run \
-		$(DEVCONTAINER_NAME) \
+		$(DEVCONTAINER) \
 		pre-commit run --all-files
 
 ###################

From 14c8cf9923f90552269ec3a929ea9d00f7f45600 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 9 Jan 2025 01:23:08 +0100
Subject: [PATCH 88/92] build(docker): changed location of the maps directory
 in docker

---
 docker/Dockerfile     | 2 +-
 docker/Dockerfile.dev | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 3c6c7d7..30e78d6 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -12,7 +12,7 @@ WORKDIR /app
 # Copying the replay parsing tool:
 # sc2egset_replaypack_processor requires the .exe file to be in the same directory as the script:
 COPY --from=extractor /app/SC2InfoExtractorGo /app/SC2InfoExtractorGo
-COPY --from=extractor /app/maps/ /app/maps/
+COPY --from=extractor /app/maps/ /app//processing/maps/
 
 # Ensure the executable has the right permissions
 RUN chmod +x /app/SC2InfoExtractorGo
diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
index df4fbab..a86b2e5 100644
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@@ -19,7 +19,7 @@ WORKDIR /app
 # Copying the replay parsing tool:
 # sc2egset_replaypack_processor requires the .exe file to be in the same directory as the script:
 COPY --from=extractor /app/SC2InfoExtractorGo /app/SC2InfoExtractorGo
-COPY --from=extractor /app/maps/ /app/maps/
+COPY --from=extractor /app/maps/ /app/processing/maps/
 
 # Ensure the executable has the right permissions:
 RUN chmod +x /app/SC2InfoExtractorGo

From 7356c1d63c378766717367ec9681d258b03fee74 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 9 Jan 2025 03:09:59 +0100
Subject: [PATCH 89/92] feat: ignoring maps directory

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index fbada7e..98a25b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,8 @@
 /.vscode
 /venv*
 
-/processing
+processing/
+maps/
 
 *.SC2Replay
 *.SC2Map

From 9569bc80b83473e7d6c71642cee7f915418f32ee Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 9 Jan 2025 03:12:13 +0100
Subject: [PATCH 90/92] fix(directory_flattener.py): manually tested flattening
 directories

---
 .../directory_flattener/directory_flattener.py         | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/datasetpreparator/directory_flattener/directory_flattener.py b/src/datasetpreparator/directory_flattener/directory_flattener.py
index cc676a7..8b2f6db 100644
--- a/src/datasetpreparator/directory_flattener/directory_flattener.py
+++ b/src/datasetpreparator/directory_flattener/directory_flattener.py
@@ -48,7 +48,7 @@ def calculate_file_hash(file_path: Path) -> str:
     """
 
     # Open the file, read it in binary mode and calculate the hash:
-    path_str = file_path.as_posix().encode("utf-8")
+    path_str = str(file_path).encode("utf-8")
 
     path_hash = hashlib.md5(path_str).hexdigest()
 
@@ -160,11 +160,7 @@ def multiple_directory_flattener(
         logging.error(f"Input path must exist! {str(input_path.resolve())}")
         return (False, [Path()])
 
-    # Output path must be a directory:
-    if not output_path.is_dir():
-        logging.error(f"Output path must be a directory! {str(output_path.resolve())}")
-        return (False, [Path()])
-
+    # Output path must be an existing directory:
     if user_prompt_overwrite_ok(path=output_path, force_overwrite=force_overwrite):
         output_path.mkdir(exist_ok=True)
 
@@ -224,7 +220,7 @@ def multiple_directory_flattener(
 @click.option(
     "--output_path",
     type=click.Path(
-        exists=True,
+        exists=False,
         dir_okay=True,
         file_okay=False,
         resolve_path=True,

From 377d838b1ab60919aaad4e0bd5cace0998904228 Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 9 Jan 2025 03:13:09 +0100
Subject: [PATCH 91/92] feat: separate sc2egset_pipeline and
 replaypack_processor

---
 .../sc2egset_pipeline.py                      | 220 ++++++++++++++++++
 .../sc2egset_replaypack_processor.py          |  57 +++--
 .../utils/download_maps.py                    |   2 +-
 .../utils/multiprocess.py                     |   4 +
 4 files changed, 260 insertions(+), 23 deletions(-)
 create mode 100644 src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_pipeline.py

diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_pipeline.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_pipeline.py
new file mode 100644
index 0000000..44f0c29
--- /dev/null
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_pipeline.py
@@ -0,0 +1,220 @@
+import logging
+from pathlib import Path
+
+import click
+
+from datasetpreparator.directory_flattener.directory_flattener import (
+    multiple_directory_flattener,
+)
+from datasetpreparator.directory_packager.directory_packager import (
+    multiple_dir_packager,
+)
+from datasetpreparator.file_renamer.file_renamer import file_renamer
+from datasetpreparator.processed_mapping_copier.processed_mapping_copier import (
+    processed_mapping_copier,
+)
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.download_maps import (
+    sc2infoextractorgo_map_download,
+)
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.file_copier import (
+    move_files,
+)
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.multiprocess import (
+    sc2egset_replaypack_processor,
+)
+from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.replaypack_processor_args import (
+    ReplaypackProcessorArguments,
+)
+from datasetpreparator.settings import LOGGING_FORMAT
+from datasetpreparator.utils.user_prompt import user_prompt_overwrite_ok
+
+
+def prepare_sc2reset(
+    output_path: Path,
+    replaypacks_input_path: Path,
+    n_processes: int,
+    force_overwrite: bool,
+    maps_output_path: Path,
+    directory_flattener_output_path: Path,
+) -> None:
+    # Directory flattener:
+
+    if user_prompt_overwrite_ok(
+        path=directory_flattener_output_path, force_overwrite=force_overwrite
+    ):
+        directory_flattener_output_path.mkdir(exist_ok=True)
+
+    logging.info("Flattening directories...")
+    multiple_directory_flattener(
+        input_path=replaypacks_input_path,
+        output_path=directory_flattener_output_path,
+        file_extension=".SC2Replay",
+    )
+
+    # Separate arguments for map downloading are required because the maps directory should be placed
+    # ready for the SC2ReSet to be zipped and moved to the output directory:
+    map_downloader_args = ReplaypackProcessorArguments(
+        input_path=replaypacks_input_path,
+        output_path=directory_flattener_output_path,
+        n_processes=n_processes,
+        maps_directory=maps_output_path,
+    )
+
+    # NOTE: Chinese maps need to be pre-seeded so that they can be
+    # hosted later on. They are also needed for the SC2EGSet to reproduce the results.
+    # Download all maps for multiprocess, map files are used as a source of truth for
+    # SC2InfoExtractorGo downloading mechanism:
+    logging.info("Downloading all maps using SC2InfoExtractorGo...")
+    sc2infoextractorgo_map_download(arguments=map_downloader_args)
+
+    # Package SC2ReSet and the downloaded maps, move to the output directory:
+    logging.info("Packaging SC2ReSet and the downloaded maps...")
+    multiple_dir_packager(input_path=directory_flattener_output_path)
+
+    sc2reset_output_path = Path(output_path, "SC2ReSet").resolve()
+    logging.info("Moving SC2ReSet to the output directory...")
+    move_files(
+        input_path=directory_flattener_output_path,
+        output_path=sc2reset_output_path,
+        force_overwrite=force_overwrite,
+    )
+
+
+def prepare_sc2egset(
+    replaypacks_input_path: Path,
+    output_path: Path,
+    n_processes: int,
+    maps_output_path: Path,
+    directory_flattener_output_path: Path,
+    force_overwrite: bool,
+) -> None:
+    # SC2EGSet Processor:
+    sc2egset_processor_args = ReplaypackProcessorArguments(
+        input_path=replaypacks_input_path,
+        output_path=output_path,
+        n_processes=n_processes,
+        maps_directory=maps_output_path,
+    )
+
+    # Process SC2EGSet, this will use the same map directory as the previous step:
+    logging.info("Processing SC2EGSet using SC2InfoExtractorGo...")
+    sc2egset_replaypack_processor(
+        arguments=sc2egset_processor_args, force_overwrite=force_overwrite
+    )
+
+    # Processed Mapping Copier:
+    logging.info("Copying processed_mapping.json files...")
+    processed_mapping_copier(
+        input_path=directory_flattener_output_path, output_path=output_path
+    )
+
+    # File Renamer:
+    logging.info("Renaming auxilliary (log) files...")
+    file_renamer(input_path=output_path)
+
+    logging.info("Packaging SC2EGSet...")
+    multiple_dir_packager(input_path=output_path, force_overwrite=force_overwrite)
+
+    # SC2EGSet should be ready, move it to the final output directory:
+    sc2egset_output_path = Path(output_path, "SC2EGSet").resolve()
+    logging.info("Moving SC2EGSet to the output directory...")
+    move_files(
+        input_path=output_path,
+        output_path=sc2egset_output_path,
+        force_overwrite=force_overwrite,
+    )
+
+
+@click.command(
+    help="Tool used to recreate SC2ReSet and SC2EGSet Dataset. Depends on SC2InfoExtractorGo (https://github.com/Kaszanas/SC2InfoExtractorGo) which is executed on multiple replaypack directories in the process. Entire pipeline for replay processing runs with the command line arguments used to create SC2EGSet. Assists in processing StarCraft 2 (SC2) datasets."
+)
+@click.option(
+    "--input_path",
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
+    required=True,
+    help="Input directory containing multiple StarCraft 2 replaypacks. These files will be processed exactly the same as SC2ReSet and SC2EGSet datasets.",
+)
+@click.option(
+    "--output_path",
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
+    required=True,
+    help="Output path where the tool will place the processed files for SC2ReSet and SC2EGSet dataset as children directories.",
+)
+@click.option(
+    "--n_processes",
+    type=int,
+    default=4,
+    required=True,
+    help="Number of processes to be spawned for the dataset processing with SC2InfoExtractorGo.",
+)
+@click.option(
+    "--force_overwrite",
+    type=bool,
+    default=False,
+    required=True,
+    help="Flag that specifies if the user wants to overwrite files or directories without being prompted.",
+)
+@click.option(
+    "--log",
+    type=click.Choice(["INFO", "DEBUG", "ERROR", "WARN"], case_sensitive=False),
+    default="WARN",
+    help="Log level. Default is WARN.",
+)
+def main(
+    input_path: Path,
+    output_path: Path,
+    n_processes: int,
+    force_overwrite: bool,
+    log: str,
+) -> None:
+    numeric_level = getattr(logging, log.upper(), None)
+    if not isinstance(numeric_level, int):
+        raise ValueError(f"Invalid log level: {numeric_level}")
+    logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
+
+    # Create output directory if it does not exist:
+    if user_prompt_overwrite_ok(path=output_path, force_overwrite=force_overwrite):
+        output_path.mkdir(exist_ok=True)
+
+    # This input will be flattened:
+    replaypacks_input_path = Path(input_path).resolve()
+    output_path = Path(output_path).resolve()
+
+    maps_output_path = Path(output_path, "maps").resolve()
+    directory_flattener_output_path = Path(
+        output_path, "directory_flattener_output"
+    ).resolve()
+
+    # TODO: Recreate the entire pipeline for SC2ReSet and SC2EGSet:
+    prepare_sc2reset(
+        output_path=output_path,
+        replaypacks_input_path=replaypacks_input_path,
+        n_processes=n_processes,
+        force_overwrite=force_overwrite,
+        maps_output_path=maps_output_path,
+        directory_flattener_output_path=directory_flattener_output_path,
+    )
+
+    prepare_sc2egset(
+        replaypacks_input_path=replaypacks_input_path,
+        output_path=output_path,
+        n_processes=n_processes,
+        maps_output_path=maps_output_path,
+        directory_flattener_output_path=directory_flattener_output_path,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
index 415592e..d621958 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/sc2egset_replaypack_processor.py
@@ -14,7 +14,7 @@
     processed_mapping_copier,
 )
 from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.download_maps import (
-    sc2info_extractor_go_map_download,
+    sc2infoextractorgo_map_download,
 )
 from datasetpreparator.sc2.sc2egset_replaypack_processor.utils.file_copier import (
     move_files,
@@ -65,7 +65,7 @@ def prepare_sc2reset(
     # Download all maps for multiprocess, map files are used as a source of truth for
     # SC2InfoExtractorGo downloading mechanism:
     logging.info("Downloading all maps using SC2InfoExtractorGo...")
-    sc2info_extractor_go_map_download(arguments=map_downloader_args)
+    sc2infoextractorgo_map_download(arguments=map_downloader_args)
 
     # Package SC2ReSet and the downloaded maps, move to the output directory:
     logging.info("Packaging SC2ReSet and the downloaded maps...")
@@ -141,7 +141,7 @@ def prepare_sc2egset(
 @click.option(
     "--output_path",
     type=click.Path(
-        exists=True,
+        exists=False,
         dir_okay=True,
         file_okay=False,
         resolve_path=True,
@@ -150,6 +150,18 @@ def prepare_sc2egset(
     required=True,
     help="Output path where the tool will place the processed files for SC2ReSet and SC2EGSet dataset as children directories.",
 )
+@click.option(
+    "--maps_path",
+    type=click.Path(
+        exists=True,
+        dir_okay=True,
+        file_okay=False,
+        resolve_path=True,
+        path_type=Path,
+    ),
+    required=True,
+    help="Path to the StarCraft 2 maps that will be used in replay processing. If there are no maps, they will be downloaded.",
+)
 @click.option(
     "--n_processes",
     type=int,
@@ -173,6 +185,7 @@ def prepare_sc2egset(
 def main(
     input_path: Path,
     output_path: Path,
+    maps_path: Path,
     n_processes: int,
     force_overwrite: bool,
     log: str,
@@ -182,35 +195,35 @@ def main(
         raise ValueError(f"Invalid log level: {numeric_level}")
     logging.basicConfig(format=LOGGING_FORMAT, level=numeric_level)
 
+    output_path = output_path.resolve()
+
+    replaypacks_input_path = input_path.resolve()
+    maps_path = maps_path.resolve()
+    if user_prompt_overwrite_ok(path=maps_path, force_overwrite=force_overwrite):
+        maps_path.mkdir(exist_ok=True)
+
     # Create output directory if it does not exist:
     if user_prompt_overwrite_ok(path=output_path, force_overwrite=force_overwrite):
         output_path.mkdir(exist_ok=True)
 
-    # This input will be flattened:
-    replaypacks_input_path = Path(input_path).resolve()
-    output_path = Path(output_path).resolve()
-
-    maps_output_path = Path(output_path, "maps").resolve()
-    directory_flattener_output_path = Path(
-        output_path, "directory_flattener_output"
-    ).resolve()
-
-    # TODO: Recreate the entire pipeline for SC2ReSet and SC2EGSet:
-    prepare_sc2reset(
+    # Pre-processing, downloading maps and flattening directories:
+    map_downloader_args = ReplaypackProcessorArguments(
+        input_path=replaypacks_input_path,
         output_path=output_path,
-        replaypacks_input_path=replaypacks_input_path,
+        maps_directory=maps_path,
         n_processes=n_processes,
-        force_overwrite=force_overwrite,
-        maps_output_path=maps_output_path,
-        directory_flattener_output_path=directory_flattener_output_path,
     )
+    sc2infoextractorgo_map_download(arguments=map_downloader_args)
 
-    prepare_sc2egset(
-        replaypacks_input_path=replaypacks_input_path,
+    # Main processing
+    sc2egset_processor_args = ReplaypackProcessorArguments(
+        input_path=replaypacks_input_path,
         output_path=output_path,
+        maps_directory=maps_path,
         n_processes=n_processes,
-        maps_output_path=maps_output_path,
-        directory_flattener_output_path=directory_flattener_output_path,
+    )
+    sc2egset_replaypack_processor(
+        arguments=sc2egset_processor_args, force_overwrite=force_overwrite
     )
 
 
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/download_maps.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/download_maps.py
index 1f4db97..b331f52 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/download_maps.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/download_maps.py
@@ -8,7 +8,7 @@
 )
 
 
-def sc2info_extractor_go_map_download(arguments: ReplaypackProcessorArguments):
+def sc2infoextractorgo_map_download(arguments: ReplaypackProcessorArguments):
     # Pre-process, download all maps:
     logging.info("Downloading all maps...")
     map_download_arguments = SC2InfoExtractorGoArguments.get_download_maps_args(
diff --git a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
index 3279a35..487d432 100644
--- a/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
+++ b/src/datasetpreparator/sc2/sc2egset_replaypack_processor/utils/multiprocess.py
@@ -90,6 +90,7 @@ def process_single_replaypack(arguments: SC2InfoExtractorGoArguments) -> None:
 
 def sc2egset_replaypack_processor(
     arguments: ReplaypackProcessorArguments,
+    force_overwrite: bool,
 ):
     """
     Processes multiple StarCraft II replaypacks
@@ -99,6 +100,8 @@ def sc2egset_replaypack_processor(
     ----------
     arguments : ReplaypackProcessorArguments
         Specifies the arguments as per the ReplaypackProcessorArguments class fields.
+    force_overwrite : bool
+        Specifies whether the output directory should be overwritten.
     """
 
     multiprocessing_list = []
@@ -106,6 +109,7 @@ def sc2egset_replaypack_processor(
         sc2_info_extractor_go_args = define_sc2egset_args(
             arguments=arguments,
             maybe_dir=maybe_dir,
+            force_overwrite=force_overwrite,
         )
         if sc2_info_extractor_go_args is not None:
             multiprocessing_list.append(sc2_info_extractor_go_args)

From af667642240974a6f6661d6cf2d8e799abff2bfe Mon Sep 17 00:00:00 2001
From: Kaszanas <34846245+Kaszanas@users.noreply.github.com>
Date: Thu, 9 Jan 2025 18:31:40 +0100
Subject: [PATCH 92/92] test: fixing tests after func args change

---
 tests/test_cases/sc2egset_replaypack_processor_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_cases/sc2egset_replaypack_processor_test.py b/tests/test_cases/sc2egset_replaypack_processor_test.py
index 4739d38..a6923ed 100644
--- a/tests/test_cases/sc2egset_replaypack_processor_test.py
+++ b/tests/test_cases/sc2egset_replaypack_processor_test.py
@@ -57,7 +57,7 @@ def test_sc2_replaypack_processor(self):
             n_processes=1,
         )
 
-        sc2egset_replaypack_processor(arguments=arguments)
+        sc2egset_replaypack_processor(arguments=arguments, force_overwrite=True)
         # TODO: Check if output contains the same directories as for input.
         # TODO: Check if outputs contain extracted JSON files with valid fields.