From 2bcefcdeff2e49a3c0381f746d7a81ccd6a32c59 Mon Sep 17 00:00:00 2001 From: bshankar Date: Thu, 26 Sep 2024 11:16:57 +0530 Subject: [PATCH 1/4] Fix: Speed up CSV generation by minimizing DB probes --- backend/services/project_search_service.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/backend/services/project_search_service.py b/backend/services/project_search_service.py index ac4a34cdf3..19bbeefc25 100644 --- a/backend/services/project_search_service.py +++ b/backend/services/project_search_service.py @@ -204,15 +204,30 @@ def get_total_contributions(paginated_results): def search_projects_as_csv(search_dto: ProjectSearchDTO, user) -> str: all_results, _ = ProjectSearchService._filter_projects(search_dto, user) is_user_admin = user is not None and user.role == UserRole.ADMIN.value + + project_ids = [p.id for p in all_results] + contributors_by_project_id = ( + TaskHistory.query.with_entities( + TaskHistory.project_id, func.count(TaskHistory.user_id.distinct()) + ) + .filter( + TaskHistory.project_id.in_(project_ids), TaskHistory.action != "COMMENT" + ) + .group_by(TaskHistory.project_id) + .all() + ) + results_as_dto = [ ProjectSearchService.create_result_dto( - p, + project, search_dto.preferred_locale, - Project.get_project_total_contributions(p[0]), + next(filter(lambda c: c[0] == project.id, contributors_by_project_id))[ + 1 + ], with_partner_names=is_user_admin, with_author_name=False, ).to_primitive() - for p in all_results + for project in all_results ] df = pd.json_normalize(results_as_dto) From 0768c6d8eed18ea4b77d35c6523ceae0e0f1959c Mon Sep 17 00:00:00 2001 From: bshankar Date: Thu, 26 Sep 2024 11:37:50 +0530 Subject: [PATCH 2/4] Fix: Sonarcloud suggestion --- backend/services/project_search_service.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/services/project_search_service.py b/backend/services/project_search_service.py index 19bbeefc25..8eeb9cfa8f 100644 --- a/backend/services/project_search_service.py +++ b/backend/services/project_search_service.py @@ -221,9 +221,12 @@ def search_projects_as_csv(search_dto: ProjectSearchDTO, user) -> str: ProjectSearchService.create_result_dto( project, search_dto.preferred_locale, - next(filter(lambda c: c[0] == project.id, contributors_by_project_id))[ - 1 - ], + next( + filter( + lambda c, p=project: c[0] == p.id, + contributors_by_project_id, + ) + )[1], with_partner_names=is_user_admin, with_author_name=False, ).to_primitive() From f43feab591b6fc98f6739a5545748349cb57fd5e Mon Sep 17 00:00:00 2001 From: bshankar Date: Mon, 30 Sep 2024 15:51:37 +0530 Subject: [PATCH 3/4] Remplement download CSV in a more optimal way --- backend/models/postgis/project.py | 4 +- backend/services/project_search_service.py | 84 +++++++++++++--------- 2 files changed, 51 insertions(+), 37 deletions(-) diff --git a/backend/models/postgis/project.py b/backend/models/postgis/project.py index fc85b69f42..440edff4cf 100644 --- a/backend/models/postgis/project.py +++ b/backend/models/postgis/project.py @@ -206,13 +206,13 @@ class Project(db.Model): def percent_mapped(self): return ( (self.tasks_mapped + self.tasks_validated) - / (self.total_tasks - self.tasks_bad_imagery) * 100 + // (self.total_tasks - self.tasks_bad_imagery) ) @hybrid_property def percent_validated(self): - return self.tasks_validated / (self.total_tasks - self.tasks_bad_imagery) * 100 + return self.tasks_validated * 100 // (self.total_tasks - self.tasks_bad_imagery) # Mapped Objects tasks = db.relationship( diff --git a/backend/services/project_search_service.py b/backend/services/project_search_service.py index 8eeb9cfa8f..3c09b25ff0 100644 --- a/backend/services/project_search_service.py +++ b/backend/services/project_search_service.py @@ -73,6 +73,7 @@ def create_search_query(user=None): query = ( db.session.query( Project.id.label("id"), + ProjectInfo.name.label("project_name"), Project.difficulty, Project.priority, Project.default_locale, @@ -81,6 +82,8 @@ def create_search_query(user=None): Project.tasks_bad_imagery, Project.tasks_mapped, Project.tasks_validated, + Project.percent_mapped, + Project.percent_validated, Project.status, Project.total_tasks, Project.last_updated, @@ -88,10 +91,14 @@ def create_search_query(user=None): Project.country, Organisation.name.label("organisation_name"), Organisation.logo.label("organisation_logo"), + Project.created.label("creation_date"), + func.coalesce( + func.sum(func.ST_Area(Project.geometry, True) / 1000000) + ).label("total_area"), ) .filter(Project.geometry is not None) .outerjoin(Organisation, Organisation.id == Project.organisation_id) - .group_by(Organisation.id, Project.id) + .group_by(Organisation.id, Project.id, ProjectInfo.name) ) # Get public projects only for anonymous user. @@ -203,52 +210,59 @@ def get_total_contributions(paginated_results): @cached(csv_download_cache) def search_projects_as_csv(search_dto: ProjectSearchDTO, user) -> str: all_results, _ = ProjectSearchService._filter_projects(search_dto, user) + rows = [row._asdict() for row in all_results] is_user_admin = user is not None and user.role == UserRole.ADMIN.value - project_ids = [p.id for p in all_results] - contributors_by_project_id = ( - TaskHistory.query.with_entities( - TaskHistory.project_id, func.count(TaskHistory.user_id.distinct()) - ) - .filter( - TaskHistory.project_id.in_(project_ids), TaskHistory.action != "COMMENT" - ) - .group_by(TaskHistory.project_id) - .all() - ) - - results_as_dto = [ - ProjectSearchService.create_result_dto( - project, - search_dto.preferred_locale, - next( - filter( - lambda c, p=project: c[0] == p.id, - contributors_by_project_id, + for row in rows: + row["priority"] = ProjectPriority(row["priority"]).name + row["difficulty"] = ProjectDifficulty(row["difficulty"]).name + row["status"] = ProjectStatus(row["status"]).name + row["total_area"] = round(row["total_area"], 3) + row["total_contributors"] = Project.get_project_total_contributions(row["id"]) + + if is_user_admin: + partners_names = ( + ProjectPartnership.query.with_entities( + ProjectPartnership.project_id, Partner.name ) - )[1], - with_partner_names=is_user_admin, - with_author_name=False, - ).to_primitive() - for project in all_results - ] + .join(Partner, ProjectPartnership.partner_id == Partner.id) + .filter(ProjectPartnership.project_id == row["id"]) + .group_by(ProjectPartnership.project_id, Partner.name) + .all() + ) + row["partner_names"] = [pn for (_, pn) in partners_names] - df = pd.json_normalize(results_as_dto) + df = pd.json_normalize(rows) columns_to_drop = [ - "locale", - "shortDescription", - "organisationLogo", - "campaigns", + "default_locale", + "organisation_id", + "organisation_logo", + "tasks_bad_imagery", + "tasks_mapped", + "tasks_validated", + "total_tasks", + "centroid", ] - if not is_user_admin: - columns_to_drop.append("partnerNames") + + colummns_to_rename = { + "id": "projectId", + "organisation_name": "organisationName", + "last_updated": "lastUpdated", + "due_date": "dueDate", + "percent_mapped": "percentMapped", + "percent_validated": "percentValidated", + "total_area": "totalArea", + "total_contributors": "totalContributors", + "partner_names": "partnerNames", + "project_name": "name", + } df.drop( columns=columns_to_drop, inplace=True, axis=1, ) - + df.rename(columns=colummns_to_rename, inplace=True) return df.to_csv(index=False) @staticmethod From e9b84d4272b954f80429a7ccf00592e94d69a467 Mon Sep 17 00:00:00 2001 From: bshankar Date: Mon, 30 Sep 2024 19:36:04 +0530 Subject: [PATCH 4/4] Fix: Use old query for other uses of create_search_query --- backend/services/project_search_service.py | 95 ++++++++++++++-------- 1 file changed, 61 insertions(+), 34 deletions(-) diff --git a/backend/services/project_search_service.py b/backend/services/project_search_service.py index 3c09b25ff0..bdddc463c9 100644 --- a/backend/services/project_search_service.py +++ b/backend/services/project_search_service.py @@ -69,37 +69,62 @@ def __init__(self, message): class ProjectSearchService: @staticmethod - def create_search_query(user=None): - query = ( - db.session.query( - Project.id.label("id"), - ProjectInfo.name.label("project_name"), - Project.difficulty, - Project.priority, - Project.default_locale, - Project.centroid.ST_AsGeoJSON().label("centroid"), - Project.organisation_id, - Project.tasks_bad_imagery, - Project.tasks_mapped, - Project.tasks_validated, - Project.percent_mapped, - Project.percent_validated, - Project.status, - Project.total_tasks, - Project.last_updated, - Project.due_date, - Project.country, - Organisation.name.label("organisation_name"), - Organisation.logo.label("organisation_logo"), - Project.created.label("creation_date"), - func.coalesce( - func.sum(func.ST_Area(Project.geometry, True) / 1000000) - ).label("total_area"), + def create_search_query(user=None, as_csv: bool = False): + if as_csv: + query = ( + db.session.query( + Project.id.label("id"), + ProjectInfo.name.label("project_name"), + Project.difficulty, + Project.priority, + Project.default_locale, + Project.centroid.ST_AsGeoJSON().label("centroid"), + Project.organisation_id, + Project.tasks_bad_imagery, + Project.tasks_mapped, + Project.tasks_validated, + Project.percent_mapped, + Project.percent_validated, + Project.status, + Project.total_tasks, + Project.last_updated, + Project.due_date, + Project.country, + Organisation.name.label("organisation_name"), + Organisation.logo.label("organisation_logo"), + Project.created.label("creation_date"), + func.coalesce( + func.sum(func.ST_Area(Project.geometry, True) / 1000000) + ).label("total_area"), + ) + .filter(Project.geometry is not None) + .outerjoin(Organisation, Organisation.id == Project.organisation_id) + .group_by(Organisation.id, Project.id, ProjectInfo.name) + ) + else: + query = ( + db.session.query( + Project.id.label("id"), + Project.difficulty, + Project.priority, + Project.default_locale, + Project.centroid.ST_AsGeoJSON().label("centroid"), + Project.organisation_id, + Project.tasks_bad_imagery, + Project.tasks_mapped, + Project.tasks_validated, + Project.status, + Project.total_tasks, + Project.last_updated, + Project.due_date, + Project.country, + Organisation.name.label("organisation_name"), + Organisation.logo.label("organisation_logo"), + ) + .filter(Project.geometry is not None) + .outerjoin(Organisation, Organisation.id == Project.organisation_id) + .group_by(Organisation.id, Project.id) ) - .filter(Project.geometry is not None) - .outerjoin(Organisation, Organisation.id == Project.organisation_id) - .group_by(Organisation.id, Project.id, ProjectInfo.name) - ) # Get public projects only for anonymous user. if user is None: @@ -209,7 +234,7 @@ def get_total_contributions(paginated_results): @staticmethod @cached(csv_download_cache) def search_projects_as_csv(search_dto: ProjectSearchDTO, user) -> str: - all_results, _ = ProjectSearchService._filter_projects(search_dto, user) + all_results, _ = ProjectSearchService._filter_projects(search_dto, user, True) rows = [row._asdict() for row in all_results] is_user_admin = user is not None and user.role == UserRole.ADMIN.value @@ -218,7 +243,9 @@ def search_projects_as_csv(search_dto: ProjectSearchDTO, user) -> str: row["difficulty"] = ProjectDifficulty(row["difficulty"]).name row["status"] = ProjectStatus(row["status"]).name row["total_area"] = round(row["total_area"], 3) - row["total_contributors"] = Project.get_project_total_contributions(row["id"]) + row["total_contributors"] = Project.get_project_total_contributions( + row["id"] + ) if is_user_admin: partners_names = ( @@ -310,10 +337,10 @@ def search_projects(search_dto: ProjectSearchDTO, user) -> ProjectSearchResultsD return dto @staticmethod - def _filter_projects(search_dto: ProjectSearchDTO, user): + def _filter_projects(search_dto: ProjectSearchDTO, user, as_csv=False): """Filters all projects based on criteria provided by user""" - query = ProjectSearchService.create_search_query(user) + query = ProjectSearchService.create_search_query(user, as_csv) query = query.join(ProjectInfo).filter( ProjectInfo.locale.in_([search_dto.preferred_locale, "en"])