Merge pull request #799 from Avaiga/fix/migrate-cli-config

fix: add global config migration
Avaiga · Oct 19, 2023 · 5c56da2 · 5c56da2
2 parents 61cea4d + 15d9617
commit 5c56da2
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 21 deletions.
diff --git a/src/taipy/core/_entity/_migrate/_utils.py b/src/taipy/core/_entity/_migrate/_utils.py
@@ -115,16 +115,17 @@ def __is_cacheable(task: Dict, data: Dict) -> bool:
     return True
 
 
-def __migrate_task(task: Dict, data: Dict) -> Dict:
-    # parent_id has been renamed to owner_id
-    try:
-        task["owner_id"] = task["parent_id"]
-        del task["parent_id"]
-    except KeyError:
-        pass
-
-    # properties was not present in 2.0
-    task["properties"] = task.get("properties", {})
+def __migrate_task(task: Dict, data: Dict, is_entity: bool = True) -> Dict:
+    if is_entity:
+        # parent_id has been renamed to owner_id
+        try:
+            task["owner_id"] = task["parent_id"]
+            del task["parent_id"]
+        except KeyError:
+            pass
+
+        # properties was not present in 2.0
+        task["properties"] = task.get("properties", {})
 
     # skippable was not present in 2.0
     task["skippable"] = task.get("skippable", False) or __is_cacheable(task, data)
@@ -137,9 +138,8 @@ def __migrate_task_entity(task: Dict, data: Dict) -> Dict:
     return __migrate_task(task, data)
 
 
-def __migrate_task_config(id: str, task: Dict, config: Dict) -> Dict:
-    task = __update_config_parent_ids(id, task, "TASK", config)
-    task = __migrate_task(task, config["DATA_NODE"])
+def __migrate_task_config(task: Dict, config: Dict) -> Dict:
+    task = __migrate_task(task, config["DATA_NODE"], False)
 
     # Convert the skippable boolean to a string if needed
     if isinstance(task.get("skippable"), bool):
@@ -205,10 +205,10 @@ def __migrate_datanode_entity(datanode: Dict, data: Dict) -> Dict:
     return __migrate_datanode(datanode)
 
 
-def __migrate_datanode_config(id: str, datanode: Dict, config: Dict) -> Dict:
-    datanode_cfg = __update_config_parent_ids(id, datanode, "DATA_NODE", config)
-    datanode_cfg = __migrate_datanode(datanode_cfg)
-    return datanode_cfg
+def __migrate_datanode_config(datanode: Dict) -> Dict:
+    if datanode["storage_type"] in ["csv", "json"]:
+        datanode["encoding"] = "utf-8"
+    return datanode
 
 
 def __migrate_job(job: Dict) -> Dict:
@@ -220,13 +220,34 @@ def __migrate_job(job: Dict) -> Dict:
     return job
 
 
+def __migrate_global_config(config: Dict):
+    fields_to_remove = ["clean_entities_enabled"]
+    fields_to_move = ["root_folder", "storage_folder", "repository_type", "read_entity_retry"]
+
+    for field in fields_to_remove:
+        if field in config["TAIPY"]:
+            del config["TAIPY"][field]
+    try:
+        for field in fields_to_move:
+            if field not in config["CORE"]:
+                config["CORE"][field] = config["TAIPY"][field]
+                del config["TAIPY"][field]
+    except KeyError:
+        pass
+
+    return config
+
+
 def __migrate_version(version: Dict) -> Dict:
     config_str = version["config"]
 
     # Remove PIPELINE scope
     config_str = config_str.replace("PIPELINE:SCOPE", "SCENARIO:SCOPE")
     config = json.loads(config_str)
 
+    # remove unused fields and move others from TAIPY to CORE section
+    config = __migrate_global_config(config)
+
     # replace pipelines for tasks
     pipelines_section = config["PIPELINE"]
     for id, content in config["SCENARIO"].items():
@@ -238,11 +259,10 @@ def __migrate_version(version: Dict) -> Dict:
         del config["SCENARIO"][id]["pipelines"]
 
     for id, content in config["TASK"].items():
-        config["TASK"][id] = __migrate_task_config(id, content, config)
+        config["TASK"][id] = __migrate_task_config(content, config)
 
-    config["JOB"] = __migrate_job(config["JOB"])
     for id, content in config["DATA_NODE"].items():
-        config["DATA_NODE"][id] = __migrate_datanode_config(id, content, config)
+        config["DATA_NODE"][id] = __migrate_datanode_config(content)
 
     del config["PIPELINE"]
 

diff --git a/tests/core/_entity/data_sample_migrated/version/b11ea9f9-b2d7-4b58-a1c2-c6b1700bed97.json b/tests/core/_entity/data_sample_migrated/version/b11ea9f9-b2d7-4b58-a1c2-c6b1700bed97.json
@@ -1,5 +1,5 @@
 {
 "id": "b11ea9f9-b2d7-4b58-a1c2-c6b1700bed97",
-"config": "{\n\"TAIPY\": {\n\"root_folder\": \"./taipy/\",\n\"storage_folder\": \".data/\",\n\"clean_entities_enabled\": \"ENV[TAIPY_CLEAN_ENTITIES_ENABLED]\",\n\"repository_type\": \"filesystem\",\n\"read_entity_retry\": \"3:int\"\n},\n\"gui\": {},\n\"JOB\": {\n\"mode\": \"development\",\n\"max_nb_of_workers\": \"1:int\",\n\"submit_entity_id\": null\n},\n\"CORE\": {\n\"mode\": \"development\",\n\"version_number\": \"\",\n\"force\": \"False:bool\",\n\"clean_entities\": \"False:bool\"\n},\n\"DATA_NODE\": {\n\"default\": {\n\"storage_type\": \"pickle\",\n\"scope\": \"SCENARIO:SCOPE\",\n\"parent_ids\": [],\n\"last_edit_date\": null,\n\"edit_in_progress\": null\n},\n\"initial_dataset\": {\n\"storage_type\": \"csv\",\n\"scope\": \"GLOBAL:SCOPE\",\n\"path\": \"data/dataset.csv\",\n\"has_header\": \"True:bool\",\n\"exposed_type\": \"pandas\",\n\"parent_ids\": [],\n\"last_edit_date\": null,\n\"edit_in_progress\": null\n},\n\"day\": {\n\"storage_type\": \"pickle\",\n\"scope\": \"SCENARIO:SCOPE\",\n\"default_data\": \"2021-07-26T00:00:00:datetime\",\n\"parent_ids\": [],\n\"last_edit_date\": null,\n\"edit_in_progress\": null\n},\n\"cleaned_dataset\": {\n\"storage_type\": \"parquet\",\n\"scope\": \"GLOBAL:SCOPE\",\n\"engine\": \"pyarrow\",\n\"compression\": \"snappy\",\n\"exposed_type\": \"pandas\",\n\"parent_ids\": [],\n\"last_edit_date\": null,\n\"edit_in_progress\": null\n},\n\"predictions\": {\n\"storage_type\": \"pickle\",\n\"scope\": \"SCENARIO:SCOPE\",\n\"parent_ids\": [],\n\"last_edit_date\": null,\n\"edit_in_progress\": null\n},\n\"evaluation\": {\n\"storage_type\": \"pickle\",\n\"scope\": \"SCENARIO:SCOPE\",\n\"parent_ids\": [],\n\"last_edit_date\": null,\n\"edit_in_progress\": null\n}\n},\n\"TASK\": {\n\"default\": {\n\"function\": null,\n\"inputs\": [],\n\"outputs\": [],\n\"skippable\": \"False:bool\",\n\"parent_ids\": [],\n\"properties\": {}\n},\n\"clean_data\": {\n\"function\": \"algos.algos.clean_data:function\",\n\"inputs\": [\n\"initial_dataset:SECTION\"\n],\n\"outputs\": [\n\"cleaned_dataset:SECTION\"\n],\n\"parent_ids\": [\n\"clean_data:SECTION\"\n],\n\"properties\": {},\n\"skippable\": \"True:bool\"\n},\n\"predict\": {\n\"function\": \"algos.algos.predict:function\",\n\"inputs\": [\n\"cleaned_dataset:SECTION\",\n\"day:SECTION\"\n],\n\"outputs\": [\n\"predictions:SECTION\"\n],\n\"skippable\": \"True:bool\",\n\"parent_ids\": [\n\"predict:SECTION\"\n],\n\"properties\": {}\n},\n\"evaluate\": {\n\"function\": \"algos.algos.evaluate:function\",\n\"inputs\": [\n\"predictions:SECTION\",\n\"cleaned_dataset:SECTION\",\n\"day:SECTION\"\n],\n\"outputs\": [\n\"evaluation:SECTION\"\n],\n\"skippable\": \"False:bool\",\n\"parent_ids\": [\n\"evaluate:SECTION\"\n],\n\"properties\": {}\n}\n},\n\"SCENARIO\": {\n\"default\": {\n\"comparators\": {},\n\"frequency\": null,\n\"tasks\": []\n},\n\"scenario\": {\n\"comparators\": {},\n\"frequency\": \"MONTHLY:FREQUENCY\",\n\"tasks\": [\n\"clean_data:SECTION\",\n\"predict:SECTION\",\n\"evaluate:SECTION\"\n]\n}\n}\n}",
+"config": "{\n\"TAIPY\": {},\n\"gui\": {},\n\"JOB\": {\n\"mode\": \"development\",\n\"max_nb_of_workers\": \"1:int\"\n},\n\"CORE\": {\n\"mode\": \"development\",\n\"version_number\": \"\",\n\"force\": \"False:bool\",\n\"clean_entities\": \"False:bool\",\n\"root_folder\": \"./taipy/\",\n\"storage_folder\": \".data/\",\n\"repository_type\": \"filesystem\",\n\"read_entity_retry\": \"3:int\"\n},\n\"DATA_NODE\": {\n\"default\": {\n\"storage_type\": \"pickle\",\n\"scope\": \"SCENARIO:SCOPE\"\n},\n\"initial_dataset\": {\n\"storage_type\": \"csv\",\n\"scope\": \"GLOBAL:SCOPE\",\n\"path\": \"data/dataset.csv\",\n\"has_header\": \"True:bool\",\n\"exposed_type\": \"pandas\",\n\"encoding\": \"utf-8\"\n},\n\"day\": {\n\"storage_type\": \"pickle\",\n\"scope\": \"SCENARIO:SCOPE\",\n\"default_data\": \"2021-07-26T00:00:00:datetime\"\n},\n\"cleaned_dataset\": {\n\"storage_type\": \"parquet\",\n\"scope\": \"GLOBAL:SCOPE\",\n\"cacheable\": \"True:bool\",\n\"engine\": \"pyarrow\",\n\"compression\": \"snappy\",\n\"exposed_type\": \"pandas\"\n},\n\"predictions\": {\n\"storage_type\": \"pickle\",\n\"scope\": \"SCENARIO:SCOPE\"\n},\n\"evaluation\": {\n\"storage_type\": \"pickle\",\n\"scope\": \"SCENARIO:SCOPE\"\n}\n},\n\"TASK\": {\n\"default\": {\n\"function\": null,\n\"inputs\": [],\n\"outputs\": [],\n\"skippable\": \"False:bool\"\n},\n\"clean_data\": {\n\"function\": \"algos.algos.clean_data:function\",\n\"inputs\": [\n\"initial_dataset:SECTION\"\n],\n\"outputs\": [\n\"cleaned_dataset:SECTION\"\n],\n\"skippable\": \"True:bool\"\n},\n\"predict\": {\n\"function\": \"algos.algos.predict:function\",\n\"inputs\": [\n\"cleaned_dataset:SECTION\",\n\"day:SECTION\"\n],\n\"outputs\": [\n\"predictions:SECTION\"\n],\n\"skippable\": \"True:bool\"\n},\n\"evaluate\": {\n\"function\": \"algos.algos.evaluate:function\",\n\"inputs\": [\n\"predictions:SECTION\",\n\"cleaned_dataset:SECTION\",\n\"day:SECTION\"\n],\n\"outputs\": [\n\"evaluation:SECTION\"\n],\n\"skippable\": \"False:bool\"\n}\n},\n\"SCENARIO\": {\n\"default\": {\n\"comparators\": {},\n\"frequency\": null,\n\"tasks\": []\n},\n\"scenario\": {\n\"comparators\": {},\n\"frequency\": \"MONTHLY:FREQUENCY\",\n\"tasks\": [\n\"clean_data:SECTION\",\n\"predict:SECTION\",\n\"evaluate:SECTION\"\n]\n}\n}\n}",
 "creation_date": "2023-10-08T21:12:32.925153"
 }