From 41866cb37a9b1025f50b37f3735b099ac1a316f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tibor=20Cserv=C3=A1k?= <tibor.cservak97@gmail.com>
Date: Fri, 12 Jul 2024 14:01:46 +0200
Subject: [PATCH] [Fix] Report sorting in unique mode

Fixing report sorting on unique mode. After modifying the DB schema in #4089 PR, the unique mode query of getRunResults endpoint has been changed, therefore, the report sorting is not working properly.

Now, the unique mode query is redesigned and it use row_number() function to filter unique reports correctly. Where clause is also modified. It is getting rid of report annotation filter. Filtering annotation remains in having clause.
---
 .../codechecker_server/api/report_server.py   | 166 +++++++++---------
 1 file changed, 81 insertions(+), 85 deletions(-)

diff --git a/web/server/codechecker_server/api/report_server.py b/web/server/codechecker_server/api/report_server.py
index 6d7b167f69..83612629a1 100644
--- a/web/server/codechecker_server/api/report_server.py
+++ b/web/server/codechecker_server/api/report_server.py
@@ -20,7 +20,7 @@
 import zlib
 
 from copy import deepcopy
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict, defaultdict, namedtuple
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional, Set, Tuple
 
@@ -1962,117 +1962,113 @@ def getRunResults(self, run_ids, limit, offset, sort_types,
                     ReportAnnotations.value)])).label(f"annotation_{col}")
 
             if report_filter.isUnique:
+                # A report annotation filter cannot be set in WHERE clause if
+                # we use annotation parameters in aggregate functions to
+                # create a pivot table. Instead of filtering report
+                # annotations in WHERE clause, we should use HAVING clause
+                # only for filtering aggregate functions.
+                # TODO: Fixing report annotation filter in every report server
+                # endpoint function.
+                annotations_backup = report_filter.annotations
+                report_filter.annotations = None
                 filter_expression, join_tables = process_report_filter(
-                    session, run_ids, report_filter, cmp_data,
-                    keep_all_annotations=False)
+                    session, run_ids, report_filter, cmp_data)
 
                 sort_types, sort_type_map, order_type_map = \
                     get_sort_map(sort_types, True)
 
-                selects = [func.max(Report.id).label('id')]
-                for sort in sort_types:
-                    sorttypes = sort_type_map.get(sort.type)
-                    for sorttype in sorttypes:
-                        if sorttype[0] != 'bug_path_length':
-                            selects.append(func.max(sorttype[0])
-                                           .label(sorttype[1]))
-
-                unique_reports = session.query(*selects)
-                unique_reports = apply_report_filter(unique_reports,
-                                                     filter_expression,
-                                                     join_tables)
-                if report_filter.annotations is not None:
-                    unique_reports = unique_reports.outerjoin(
-                        ReportAnnotations,
-                        Report.id == ReportAnnotations.report_id)
-                unique_reports = unique_reports \
-                    .group_by(Report.bug_id) \
-                    .subquery()
-
-                # Sort the results.
-                sorted_reports = session.query(unique_reports.c.id)
-                sorted_reports = sort_results_query(sorted_reports,
-                                                    sort_types,
-                                                    sort_type_map,
-                                                    order_type_map,
-                                                    True)
-                sorted_reports = sorted_reports \
-                    .limit(limit).offset(offset).subquery()
-
-                q = session.query(Report,
-                                  File.filename,
-                                  *annotation_cols.values()) \
-                    .join(Checker,
-                          Report.checker_id == Checker.id) \
-                    .options(contains_eager(Report.checker)) \
-                    .outerjoin(
-                        File,
-                        Report.file_id == File.id) \
-                    .outerjoin(
-                        ReportAnnotations,
-                        Report.id == ReportAnnotations.report_id) \
-                    .outerjoin(sorted_reports,
-                               sorted_reports.c.id == Report.id) \
-                    .filter(sorted_reports.c.id.isnot(None))
-
-                if report_filter.annotations is not None:
+                # TODO: Create a helper function for common section of unique
+                # and non unique modes.
+                sub_query = session.query(Report,
+                                          File.filename,
+                                          Checker.analyzer_name,
+                                          Checker.checker_name,
+                                          Checker.severity,
+                                          func.row_number().over(
+                                            partition_by=Report.bug_id,
+                                            order_by=desc(Report.id)
+                                          ).label("row_num"),
+                                          *annotation_cols.values()) \
+                                   .join(Checker,
+                                         Report.checker_id == Checker.id) \
+                                   .options(contains_eager(Report.checker)) \
+                                   .outerjoin(File,
+                                              Report.file_id == File.id) \
+                                   .outerjoin(ReportAnnotations,
+                                              Report.id ==
+                                              ReportAnnotations.report_id)
+
+                sub_query = apply_report_filter(sub_query,
+                                                filter_expression,
+                                                join_tables,
+                                                [File, Checker])
+
+                sub_query = sub_query.group_by(Report.id, File.id, Checker.id)
+
+                if annotations_backup:
                     annotations = defaultdict(list)
-                    for annotation in report_filter.annotations:
+                    for annotation in annotations_backup:
                         annotations[annotation.first].append(annotation.second)
 
                     OR = []
                     for key, values in annotations.items():
                         OR.append(annotation_cols[key].in_(values))
-                    q = q.having(or_(*OR))
+                    sub_query = sub_query.having(or_(*OR))
 
-                # We have to sort the results again because an ORDER BY in a
-                # subtable is broken by the JOIN.
-                q = sort_results_query(q,
-                                       sort_types,
-                                       sort_type_map,
-                                       order_type_map)
-                q = q.group_by(Report.id, File.id, Checker.id)
+                sub_query = sort_results_query(sub_query,
+                                               sort_types,
+                                               sort_type_map,
+                                               order_type_map)
 
-                query_result = q.all()
+                sub_query = sub_query.subquery().alias()
+
+                q = session.query(sub_query) \
+                           .filter(sub_query.c.row_num == 1) \
+                           .limit(limit).offset(offset)
+
+                QueryResult = namedtuple('QueryResult', sub_query.c.keys())
+                query_result = [QueryResult(*row) for row in q.all()]
 
                 # Get report details if it is required.
                 report_details = {}
                 if get_details:
-                    report_ids = [r[0].id for r in query_result]
+                    report_ids = [r.id for r in query_result]
                     report_details = get_report_details(session, report_ids)
 
                 for row in query_result:
-                    report, filename = row[0], row[1]
                     annotations = {
-                        k: v for k, v in zip(annotation_keys, row[2:])
-                        if v is not None}
+                        k: v for k, v in zip(
+                            annotation_keys,
+                            [row.annotation_testcase,
+                             row.annotation_timestamp]
+                            ) if v is not None}
 
                     review_data = create_review_data(
-                        report.review_status,
-                        report.review_status_message,
-                        report.review_status_author,
-                        report.review_status_date,
-                        report.review_status_is_in_source)
+                        row.review_status,
+                        row.review_status_message,
+                        row.review_status_author,
+                        row.review_status_date,
+                        row.review_status_is_in_source)
 
                     results.append(
-                        ReportData(runId=report.run_id,
-                                   bugHash=report.bug_id,
-                                   checkedFile=filename,
-                                   checkerMsg=report.checker_message,
-                                   reportId=report.id,
-                                   fileId=report.file_id,
-                                   line=report.line,
-                                   column=report.column,
-                                   analyzerName=report.checker.analyzer_name,
-                                   checkerId=report.checker.checker_name,
-                                   severity=report.checker.severity,
+                        ReportData(runId=row.run_id,
+                                   bugHash=row.bug_id,
+                                   checkedFile=row.filename,
+                                   checkerMsg=row.checker_message,
+                                   reportId=row.id,
+                                   fileId=row.file_id,
+                                   line=row.line,
+                                   column=row.column,
+                                   analyzerName=row.analyzer_name,
+                                   checkerId=row.checker_name,
+                                   severity=row.severity,
                                    reviewData=review_data,
                                    detectionStatus=detection_status_enum(
-                                       report.detection_status),
-                                   detectedAt=str(report.detected_at),
-                                   fixedAt=str(report.fixed_at),
-                                   bugPathLength=report.path_length,
-                                   details=report_details.get(report.id),
+                                    row.detection_status),
+                                   detectedAt=str(row.detected_at),
+                                   fixedAt=str(row.fixed_at),
+                                   bugPathLength=row.path_length,
+                                   details=report_details.get(row.id),
                                    annotations=annotations))
             else:  # not is_unique
                 filter_expression, join_tables = process_report_filter(