From 0bbc73a16d7f02448e6709f1a3f68120263ed797 Mon Sep 17 00:00:00 2001 From: "Sofian A. Thibaut" Date: Mon, 30 Sep 2024 11:43:51 +0200 Subject: [PATCH] Fix search to ignore accents and case in trigram similarity --- public_data/models/administration/Commune.py | 3 ++- public_data/models/administration/Departement.py | 3 ++- public_data/models/administration/Epci.py | 3 ++- public_data/models/administration/Region.py | 3 ++- public_data/models/administration/Scot.py | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/public_data/models/administration/Commune.py b/public_data/models/administration/Commune.py index 8dbca5ce0..afd2fe5c5 100644 --- a/public_data/models/administration/Commune.py +++ b/public_data/models/administration/Commune.py @@ -1,6 +1,7 @@ from django.contrib.gis.db import models from django.contrib.postgres.search import TrigramSimilarity from django.core.validators import MaxValueValidator, MinValueValidator +from django.db.models.functions import Lower from public_data.models.cerema import Cerema from public_data.models.enums import SRID @@ -87,7 +88,7 @@ def get_official_id(self) -> str: @classmethod def search(cls, needle, region=None, departement=None, epci=None): - qs = cls.objects.annotate(similarity=TrigramSimilarity("name", needle)) + qs = cls.objects.annotate(similarity=TrigramSimilarity(Lower("name__unaccent"), needle.lower())) if needle.isdigit(): qs = cls.objects.filter(insee__icontains=needle) diff --git a/public_data/models/administration/Departement.py b/public_data/models/administration/Departement.py index 218e5e373..bff61fd03 100644 --- a/public_data/models/administration/Departement.py +++ b/public_data/models/administration/Departement.py @@ -1,6 +1,7 @@ from django.contrib.gis.db import models from django.contrib.postgres.fields import ArrayField from django.contrib.postgres.search import TrigramSimilarity +from django.db.models.functions import Lower from public_data.models.cerema import Cerema from public_data.models.enums import SRID @@ -48,7 +49,7 @@ def __str__(self): @classmethod def search(cls, needle, region=None, departement=None, epci=None): - qs = cls.objects.annotate(similarity=TrigramSimilarity("name", needle)) + qs = cls.objects.annotate(similarity=TrigramSimilarity(Lower("name__unaccent"), needle.lower())) qs = qs.filter(similarity__gt=0.15) # Filtrer par un score minimum de similarité qs = qs.order_by("-similarity") # Trier par score décroissant diff --git a/public_data/models/administration/Epci.py b/public_data/models/administration/Epci.py index 74f260de9..e741f5127 100644 --- a/public_data/models/administration/Epci.py +++ b/public_data/models/administration/Epci.py @@ -1,6 +1,7 @@ from django.apps import apps from django.contrib.gis.db import models from django.contrib.postgres.search import TrigramSimilarity +from django.db.models.functions import Lower from public_data.models.enums import SRID from utils.db import IntersectManager @@ -58,7 +59,7 @@ def __str__(self): @classmethod def search(cls, needle, region=None, departement=None, epci=None): - qs = cls.objects.annotate(similarity=TrigramSimilarity("name", needle)) + qs = cls.objects.annotate(similarity=TrigramSimilarity(Lower("name__unaccent"), needle.lower())) qs = qs.filter(similarity__gt=0.15) # Filtrer par un score minimum de similarité qs = qs.order_by("-similarity") # Trier par score décroissant diff --git a/public_data/models/administration/Region.py b/public_data/models/administration/Region.py index 436b0200c..9748c83b6 100644 --- a/public_data/models/administration/Region.py +++ b/public_data/models/administration/Region.py @@ -1,6 +1,7 @@ from django.apps import apps from django.contrib.gis.db import models from django.contrib.postgres.search import TrigramSimilarity +from django.db.models.functions import Lower from public_data.models.cerema import Cerema from public_data.models.enums import SRID @@ -42,7 +43,7 @@ def get_ocsge_millesimes(self) -> set: @classmethod def search(cls, needle, region=None, departement=None, epci=None): - qs = cls.objects.annotate(similarity=TrigramSimilarity("name", needle)) + qs = cls.objects.annotate(similarity=TrigramSimilarity(Lower("name__unaccent"), needle.lower())) qs = qs.filter(similarity__gt=0.15) # Filtrer par un score minimum de similarité qs = qs.order_by("-similarity") # Trier par score décroissant diff --git a/public_data/models/administration/Scot.py b/public_data/models/administration/Scot.py index 7ba74fca9..4b5557f90 100644 --- a/public_data/models/administration/Scot.py +++ b/public_data/models/administration/Scot.py @@ -1,5 +1,6 @@ from django.contrib.gis.db import models from django.contrib.postgres.search import TrigramSimilarity +from django.db.models.functions import Lower from public_data.models.cerema import Cerema from public_data.models.enums import SRID @@ -49,7 +50,7 @@ def get_official_id(self) -> str: @classmethod def search(cls, needle, region=None, departement=None, epci=None): - qs = cls.objects.annotate(similarity=TrigramSimilarity("name", needle)) + qs = cls.objects.annotate(similarity=TrigramSimilarity(Lower("name__unaccent"), needle.lower())) qs = qs.filter(similarity__gt=0.15) # Filtrer par un score minimum de similarité qs = qs.order_by("-similarity") # Trier par score décroissant