PnX-SI · andriacap · Nov 20, 2024 · Nov 21, 2024 · lpofredc · Nov 28, 2024
diff --git a/backend/gncitizen/utils/taxonomy.py b/backend/gncitizen/utils/taxonomy.py
@@ -3,6 +3,7 @@
 """A module to manage taxonomy"""
 
 from threading import Thread
+import unicodedata
 from typing import Dict, List, Optional, Union
 
 import requests
@@ -39,11 +40,14 @@
 
 
 def taxhub_rest_get_taxon_list(taxhub_list_id: int) -> Dict:
-    url = f"{TAXHUB_API}biblistes/taxons/{taxhub_list_id}"
+    url = f"{TAXHUB_API}taxref"
     params = {
+        "id_liste": taxhub_list_id,
+        "fields": "medias,medias.types,attributs,attributs.bib_attribut",
         "existing": "true",
         "order": "asc",
-        "orderby": "taxref.nom_complet",
+        "orderby": "nom_complet",
+        "limit": 1000,
     }
     res = session.get(
         url,
@@ -58,7 +62,7 @@ def taxhub_rest_get_all_lists() -> Optional[Dict]:
     url = f"{TAXHUB_API}biblistes"
     res = session.get(
         url,
-        timeout=5,
+        timeout=10000,  # TODO : suivant le nombre de taxons dans les listes cette requête peut êtr très longue (voir pour améliorer performance coté TaxHub avec notamment la définition dans le modèle BibListes : https://github.com/PnX-SI/TaxHub/blob/ea9434de5a1f227131e0e8640ad17f8a25e8a39d/apptax/taxonomie/models.py#L238 )
     )
     res.raise_for_status()
     if res.status_code == 200:
@@ -74,40 +78,46 @@ def taxhub_rest_get_all_lists() -> Optional[Dict]:
     return None
 
 
-def taxhub_rest_get_taxon(taxhub_id: int) -> Taxon:
-    if not taxhub_id:
-        raise ValueError("Null value for taxhub taxon id")
-    url = f"{TAXHUB_API}bibnoms/{taxhub_id}"
-    for _ in range(5):
-        try:
-            res = session.get(url, timeout=5)
-            break
-        except requests.exceptions.ReadTimeout:
-            continue
-
-    res.raise_for_status()
-    data = res.json()
-    data.pop("listes", None)
-    data.pop("attributs", None)
-    if len(data["medias"]) > 0:
-        media_types = ("Photo_gncitizen", "Photo_principale", "Photo")
-        i = 0
-        while i < len(media_types):
-            filtered_medias = [d for d in data["medias"] if d["nom_type_media"] == media_types[i]]
-            if len(filtered_medias) >= 1:
-                break
-            i += 1
-        medias = filtered_medias[:1]
-        data["medias"] = medias
-
-    return data
+#  TODO: check if useless and remove
+# def taxhub_rest_get_taxon(taxhub_id: int) -> Taxon:
+#     if not taxhub_id:
+#         raise ValueError("Null value for taxhub taxon id")
+#     url = f"{TAXHUB_API}taxref/{taxhub_id}"
+#     for _ in range(5):
+#         try:
+#             res = session.get(url, timeout=5)
+#             break
+#         except requests.exceptions.ReadTimeout:
+#             continue
+
+#     res.raise_for_status()
+#     data = res.json()
+#     data.pop("listes", None)
+#     data.pop("attributs", None)
+#     if len(data["medias"]) > 0:
+#         media_types = ("Photo_gncitizen", "Photo_principale", "Photo")
+#         i = 0
+#         while i < len(media_types):
+#              # TODO: résoudre problème de récupérations de types de médias  (nom_type_media non accessible) ---> fonctionne avec branche de TH https://github.com/naturalsolutions/TaxHub/blob/fix/load_types_medias_from_taxref_route/apptax/taxonomie/routestaxref.py#L231
+#             filtered_medias = [
+#                 d for d in data["medias"] if d["types"]["nom_type_media"] == media_types[i]
+#             ]
+#             if len(filtered_medias) >= 1:
+#                 break
+#             i += 1
+#         medias = filtered_medias[:1]
+#         data["medias"] = medias
+
+#     return data
 
 
 def make_taxon_repository(taxhub_list_id: int) -> List[Taxon]:
     taxa = taxhub_rest_get_taxon_list(taxhub_list_id)
-    taxon_ids = [item["id_nom"] for item in taxa.get("items")]
-    r = [taxhub_rest_get_taxon(taxon_id) for taxon_id in taxon_ids]
-    return r
+    if isinstance(taxa, dict) and "items" in taxa:
+        reformatted_taxa = reformat_taxa(taxa)
+    else:
+        reformatted_taxa = []
+    return reformatted_taxa
 
 
 def get_specie_from_cd_nom(cd_nom) -> Dict:
@@ -161,3 +171,65 @@ def refresh_taxonlist() -> Dict:
 
 daemon = Thread(target=refresh_taxonlist, daemon=True, name="Monitor")
 daemon.start()
+
+
+def reformat_taxa(taxa):
+    result = []
+
+    # On parcours chaque item de taxa et on peut ici choisir de garder ou non certains champs pertinent pour citizen
+    # NOTES: pour éviter de tout charger peut être alléger l'objet taxref si pas utilisé ?
+    for item in taxa["items"]:
+        taxon = {
+            "medias": [],
+            "cd_nom": item.get("cd_nom"),
+            "nom_francais": None,
+            "taxref": {
+                "cd_nom": item.get("cd_nom"),
+                "cd_ref": item.get("cd_ref"),
+                "cd_sup": item.get("cd_sup"),
+                "cd_taxsup": item.get("cd_taxsup"),
+                "classe": item.get("classe"),
+                "famille": item.get("famille"),
+                "group1_inpn": item.get("group1_inpn"),
+                "group2_inpn": item.get("group2_inpn"),
+                "id_habitat": item.get("id_habitat"),
+                "id_rang": item.get("id_rang"),
+                "id_statut": item.get("id_statut"),
+                "lb_auteur": item.get("lb_auteur"),
+                "lb_nom": item.get("lb_nom"),
+                "nom_complet": item.get("nom_complet"),
+                "nom_complet_html": item.get("nom_complet_html"),
+                "nom_valide": item.get("nom_valide"),
+                "nom_vern": item.get("nom_vern"),
+                "nom_vern_eng": item.get("nom_vern_eng"),
+                "ordre": item.get("ordre"),
+                "phylum": item.get("phylum"),
+                "regne": item.get("regne"),
+                "sous_famille": item.get("sous_famille"),
+                "tribu": item.get("tribu"),
+                "url": item.get("url"),
+            },
+        }
+        for media in item.get("medias", []):
+            if media.get("types", {}).get("nom_type_media") in [
+                "Photo_gncitizen",
+                "Photo_principale",
+                "Photo",
+            ]:
+                taxon["medias"].append(media)
+        # Parcourir les attributs et chercher 'nom_francais' dans bib_attribut
+        for attribut in item.get("attributs", []):
+            bib_attribut = attribut.get("bib_attribut", {})
+            if remove_accents(
+                bib_attribut.get("nom_attribut", "").lower()
+            ) == "nom_francais":
+                taxon["nom_francais"] = attribut.get("valeur_attribut")
+
+        result.append(taxon)
+
+    return result
+
+
+def remove_accents(input_str):
+    nfkd_form = unicodedata.normalize("NFKD", input_str)
+    return "".join([c for c in nfkd_form if not unicodedata.combining(c)])
diff --git a/backend/migrations/versions/e8c1cd57ad16_init_v1_0_0.py b/backend/migrations/versions/e8c1cd57ad16_init_v1_0_0.py
@@ -15,7 +15,7 @@
 
 revision = "e8c1cd57ad16"
 down_revision = None
-branch_labels = None
+branch_labels = ["gn_citizen"]
 depends_on = None
 
 SCHEMAS = ["gnc_core", "gnc_obstax", "gnc_sites"]