Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(wip): compatibility with TH v2 #426

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 105 additions & 33 deletions backend/gncitizen/utils/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""A module to manage taxonomy"""

from threading import Thread
import unicodedata
from typing import Dict, List, Optional, Union

import requests
Expand Down Expand Up @@ -39,11 +40,14 @@


def taxhub_rest_get_taxon_list(taxhub_list_id: int) -> Dict:
url = f"{TAXHUB_API}biblistes/taxons/{taxhub_list_id}"
url = f"{TAXHUB_API}taxref"
params = {
"id_liste": taxhub_list_id,
"fields": "medias,medias.types,attributs,attributs.bib_attribut",
"existing": "true",
"order": "asc",
"orderby": "taxref.nom_complet",
"orderby": "nom_complet",
"limit": 1000,
}
res = session.get(
url,
Expand All @@ -58,7 +62,7 @@ def taxhub_rest_get_all_lists() -> Optional[Dict]:
url = f"{TAXHUB_API}biblistes"
res = session.get(
url,
timeout=5,
timeout=10000, # TODO : suivant le nombre de taxons dans les listes cette requête peut êtr très longue (voir pour améliorer performance coté TaxHub avec notamment la définition dans le modèle BibListes : https://github.com/PnX-SI/TaxHub/blob/ea9434de5a1f227131e0e8640ad17f8a25e8a39d/apptax/taxonomie/models.py#L238 )
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mëme s'il est nécessaire d'augmenter le timeout, 10000 (s) me parait énorme ;).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oui on peut le garder à 5 si les développements proposés coté taxhub sont accepté . Voir PR coté taxhub : PnX-SI/TaxHub#585

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

C'est d'ailleurs ce qu'il y a marqué dans le TODO

)
res.raise_for_status()
if res.status_code == 200:
Expand All @@ -74,40 +78,46 @@ def taxhub_rest_get_all_lists() -> Optional[Dict]:
return None


def taxhub_rest_get_taxon(taxhub_id: int) -> Taxon:
if not taxhub_id:
raise ValueError("Null value for taxhub taxon id")
url = f"{TAXHUB_API}bibnoms/{taxhub_id}"
for _ in range(5):
try:
res = session.get(url, timeout=5)
break
except requests.exceptions.ReadTimeout:
continue

res.raise_for_status()
data = res.json()
data.pop("listes", None)
data.pop("attributs", None)
if len(data["medias"]) > 0:
media_types = ("Photo_gncitizen", "Photo_principale", "Photo")
i = 0
while i < len(media_types):
filtered_medias = [d for d in data["medias"] if d["nom_type_media"] == media_types[i]]
if len(filtered_medias) >= 1:
break
i += 1
medias = filtered_medias[:1]
data["medias"] = medias

return data
# TODO: check if useless and remove
# def taxhub_rest_get_taxon(taxhub_id: int) -> Taxon:
# if not taxhub_id:
# raise ValueError("Null value for taxhub taxon id")
# url = f"{TAXHUB_API}taxref/{taxhub_id}"
# for _ in range(5):
# try:
# res = session.get(url, timeout=5)
# break
# except requests.exceptions.ReadTimeout:
# continue

# res.raise_for_status()
# data = res.json()
# data.pop("listes", None)
# data.pop("attributs", None)
# if len(data["medias"]) > 0:
# media_types = ("Photo_gncitizen", "Photo_principale", "Photo")
# i = 0
# while i < len(media_types):
# # TODO: résoudre problème de récupérations de types de médias (nom_type_media non accessible) ---> fonctionne avec branche de TH https://github.com/naturalsolutions/TaxHub/blob/fix/load_types_medias_from_taxref_route/apptax/taxonomie/routestaxref.py#L231
# filtered_medias = [
# d for d in data["medias"] if d["types"]["nom_type_media"] == media_types[i]
# ]
# if len(filtered_medias) >= 1:
# break
# i += 1
# medias = filtered_medias[:1]
# data["medias"] = medias

# return data


def make_taxon_repository(taxhub_list_id: int) -> List[Taxon]:
taxa = taxhub_rest_get_taxon_list(taxhub_list_id)
taxon_ids = [item["id_nom"] for item in taxa.get("items")]
r = [taxhub_rest_get_taxon(taxon_id) for taxon_id in taxon_ids]
return r
if isinstance(taxa, dict) and "items" in taxa:
reformatted_taxa = reformat_taxa(taxa)
else:
reformatted_taxa = []
return reformatted_taxa


def get_specie_from_cd_nom(cd_nom) -> Dict:
Expand Down Expand Up @@ -161,3 +171,65 @@ def refresh_taxonlist() -> Dict:

daemon = Thread(target=refresh_taxonlist, daemon=True, name="Monitor")
daemon.start()


def reformat_taxa(taxa):
result = []

# On parcours chaque item de taxa et on peut ici choisir de garder ou non certains champs pertinent pour citizen
# NOTES: pour éviter de tout charger peut être alléger l'objet taxref si pas utilisé ?
for item in taxa["items"]:
taxon = {
"medias": [],
"cd_nom": item.get("cd_nom"),
"nom_francais": None,
"taxref": {
"cd_nom": item.get("cd_nom"),
"cd_ref": item.get("cd_ref"),
"cd_sup": item.get("cd_sup"),
"cd_taxsup": item.get("cd_taxsup"),
"classe": item.get("classe"),
"famille": item.get("famille"),
"group1_inpn": item.get("group1_inpn"),
"group2_inpn": item.get("group2_inpn"),
"id_habitat": item.get("id_habitat"),
"id_rang": item.get("id_rang"),
"id_statut": item.get("id_statut"),
"lb_auteur": item.get("lb_auteur"),
"lb_nom": item.get("lb_nom"),
"nom_complet": item.get("nom_complet"),
"nom_complet_html": item.get("nom_complet_html"),
"nom_valide": item.get("nom_valide"),
"nom_vern": item.get("nom_vern"),
"nom_vern_eng": item.get("nom_vern_eng"),
"ordre": item.get("ordre"),
"phylum": item.get("phylum"),
"regne": item.get("regne"),
"sous_famille": item.get("sous_famille"),
"tribu": item.get("tribu"),
"url": item.get("url"),
},
}
for media in item.get("medias", []):
if media.get("types", {}).get("nom_type_media") in [
"Photo_gncitizen",
"Photo_principale",
"Photo",
]:
taxon["medias"].append(media)
# Parcourir les attributs et chercher 'nom_francais' dans bib_attribut
for attribut in item.get("attributs", []):
bib_attribut = attribut.get("bib_attribut", {})
if remove_accents(
bib_attribut.get("nom_attribut", "").lower()
) == "nom_francais":
taxon["nom_francais"] = attribut.get("valeur_attribut")

result.append(taxon)

return result


def remove_accents(input_str):
nfkd_form = unicodedata.normalize("NFKD", input_str)
return "".join([c for c in nfkd_form if not unicodedata.combining(c)])
2 changes: 1 addition & 1 deletion backend/migrations/versions/e8c1cd57ad16_init_v1_0_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

revision = "e8c1cd57ad16"
down_revision = None
branch_labels = None
branch_labels = ["gn_citizen"]
depends_on = None

SCHEMAS = ["gnc_core", "gnc_obstax", "gnc_sites"]
Expand Down