From e4455e33c279eaf40e03cce023bf1ffa37755035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Csord=C3=A1s=20M=C3=A1rton?= Date: Fri, 3 Nov 2017 14:06:28 +0100 Subject: [PATCH] Group reports only by bug hash when uniqueing - Increase performance of the filter queries. - Uniqueing the filters and reports only by bughash. --- libcodechecker/libclient/thrift_helper.py | 2 +- libcodechecker/server/api/report_server.py | 319 ++++++++++-------- .../report_viewer_api/test_report_filter.py | 4 +- 3 files changed, 175 insertions(+), 150 deletions(-) diff --git a/libcodechecker/libclient/thrift_helper.py b/libcodechecker/libclient/thrift_helper.py index 4472371d0c..5c7e08743b 100644 --- a/libcodechecker/libclient/thrift_helper.py +++ b/libcodechecker/libclient/thrift_helper.py @@ -113,7 +113,7 @@ def getSourceFileData(self, fileId, fileContent, encoding): pass @ThriftClientCall - def getRunResultCount(self, runIds, reportFilters): + def getRunResultCount(self, runIds, reportFilters, cmp_data): pass @ThriftClientCall diff --git a/libcodechecker/server/api/report_server.py b/libcodechecker/server/api/report_server.py index a09271e538..8eba0c05d7 100644 --- a/libcodechecker/server/api/report_server.py +++ b/libcodechecker/server/api/report_server.py @@ -77,6 +77,7 @@ def process_report_filter_v2(report_filter, count_filter=None): if report_filter.filepath is not None: OR = [File.filepath.ilike(conv(fp)) for fp in report_filter.filepath] + AND.append(or_(*OR)) if report_filter.checkerMsg is not None: @@ -181,31 +182,8 @@ def get_diff_hashes_for_query(base_run_ids, base_line_hashes, new_run_ids, msg) -def get_run_result_query_helper(session, run_ids, report_filter, cmp_data, - diff_hashes, is_unique, is_count=False): - filter_expression = process_report_filter_v2(report_filter) - - if is_unique: - q = session.query(Report.bug_id, - Report.checker_id, - Report.checker_message, - Report.severity, - Report.file_id, - func.max(ReviewStatus.status).label('status')) \ - .group_by(Report.bug_id, - Report.checker_id, - Report.checker_message, - Report.severity, - Report.file_id) - elif is_count: - q = session.query(Report) - else: - q = session.query(Report, - File, - ReviewStatus) - - return filter_report_filter(q, filter_expression, run_ids, cmp_data, - diff_hashes) +def group_by_unique_fields(q): + return q.group_by(Report.bug_id) def bugpathevent_db_to_api(bpe): @@ -333,6 +311,43 @@ def filter_report_filter(q, filter_expression, run_ids=None, cmp_data=None, return q +def get_sort_map(sort_types, is_unique=False): + # Get a list of sort_types which will be a nested ORDER BY. + sort_type_map = { + SortType.FILENAME: [(File.filepath, 'filepath')], + SortType.CHECKER_NAME: [(Report.checker_id, 'checker_id')], + SortType.SEVERITY: [(Report.severity, 'severity')], + SortType.REVIEW_STATUS: [(ReviewStatus.status, 'rw_status')], + SortType.DETECTION_STATUS: [(Report.detection_status, 'dt_status')]} + + if is_unique: + sort_type_map[SortType.FILENAME] = [(File.filename, 'filename')] + sort_type_map[SortType.DETECTION_STATUS] = [] + + # Mapping the SQLAlchemy functions. + order_type_map = {Order.ASC: asc, Order.DESC: desc} + + if sort_types is None: + sort_types = [SortMode(SortType.SEVERITY, Order.DESC)] + + return sort_types, sort_type_map, order_type_map + + +def sort_results_query(query, sort_types, sort_type_map, order_type_map, + order_by_label=False): + """ + Helper method for __queryDiffResults and queryResults to apply sorting. + """ + for sort in sort_types: + sorttypes = sort_type_map.get(sort.type) + for sorttype in sorttypes: + order_type = order_type_map.get(sort.ord) + sort_col = sorttype[1] if order_by_label else sorttype[0] + query = query.order_by(order_type(sort_col)) + + return query + + class StorageSession: """ This class is a singleton which helps to handle a transaction which @@ -481,40 +496,6 @@ def __require_access(self): def __require_store(self): self.__require_permission([permissions.PRODUCT_STORE]) - def __sortResultsQuery(self, query, sort_types=None, is_unique=False, - subquery=None): - """ - Helper method for __queryDiffResults and queryResults to apply sorting. - """ - - # Get a list of sort_types which will be a nested ORDER BY. - sort_type_map = {SortType.FILENAME: [File.filepath], - SortType.CHECKER_NAME: [Report.checker_id], - SortType.SEVERITY: [Report.severity], - SortType.REVIEW_STATUS: [ReviewStatus.status], - SortType.DETECTION_STATUS: [Report.detection_status]} - - if is_unique: - sort_type_map = {SortType.FILENAME: [File.filename], - SortType.CHECKER_NAME: [subquery.c.checker_id], - SortType.SEVERITY: [subquery.c.severity], - SortType.REVIEW_STATUS: [subquery.c.status], - SortType.DETECTION_STATUS: []} - - # Mapping the SQLAlchemy functions. - order_type_map = {Order.ASC: asc, Order.DESC: desc} - - if sort_types is None: - sort_types = [SortMode(SortType.SEVERITY, Order.DESC)] - - for sort in sort_types: - sorttypes = sort_type_map.get(sort.type) - for sorttype in sorttypes: - order_type = order_type_map.get(sort.ord) - query = query.order_by(order_type(sorttype)) - - return query - def __get_run_ids_to_query(self, session, cmp_data=None): """ Return run id list for the queries. @@ -713,72 +694,107 @@ def getRunResults(self, run_ids, limit, offset, sort_types, # There is no difference. return results - is_unique = report_filter is not None and report_filter.isUnique - q = get_run_result_query_helper(session, run_ids, report_filter, - cmp_data, diff_hashes, is_unique) + filter_expression = process_report_filter_v2(report_filter) + is_unique = report_filter is not None and report_filter.isUnique if is_unique: - q = q.limit(limit).offset(offset).subquery() - - report_query = session.query(q, - ReviewStatus.message, - ReviewStatus.author, - ReviewStatus.date) \ + sort_types, sort_type_map, order_type_map = \ + get_sort_map(sort_types, True) + + selects = [func.max(Report.id).label('id')] + for sort in sort_types: + sorttypes = sort_type_map.get(sort.type) + for sorttype in sorttypes: + selects.append(func.max(sorttype[0]) + .label(sorttype[1])) + + unique_reports = session.query(*selects) + unique_reports = filter_report_filter(unique_reports, + filter_expression, + run_ids, + cmp_data, + diff_hashes) + unique_reports = unique_reports \ + .group_by(Report.bug_id).subquery() + + # Sort the results + sorted_reports = session.query(unique_reports.c.id) + + sorted_reports = sort_results_query(sorted_reports, + sort_types, + sort_type_map, + order_type_map, + True) + + sorted_reports = sorted_reports \ + .limit(limit).offset(offset).subquery() + + q = session.query(Report.id, Report.bug_id, + Report.checker_message, Report.checker_id, + Report.severity, ReviewStatus, File.filename, + File.filepath) \ + .outerjoin(File, Report.file_id == File.id) \ .outerjoin(ReviewStatus, - ReviewStatus.bug_hash == q.c.bug_id) \ - .subquery() - - file_query = session.query(report_query, - File.filename.label('filename')) \ - .outerjoin(File, report_query.c.file_id == File.id) - - file_query = self.__sortResultsQuery( - file_query, sort_types, is_unique, report_query) - - for report_hash, checker, checker_msg, severity, file_id, \ - rs_status, rs_msg, rs_author, rs_date, path in \ - file_query: - - review_data = None - if rs_status: - review_data = ReviewData( - status=review_status_enum(rs_status), - comment=rs_msg, - author=rs_author, - date=str(rs_date)) - else: - review_data = ReviewData( - status=ttypes.ReviewStatus.UNREVIEWED) + ReviewStatus.bug_hash == Report.bug_id) \ + .outerjoin(sorted_reports, + sorted_reports.c.id == Report.id) \ + .filter(sorted_reports.c.id.isnot(None)) + + for report_id, bug_id, checker_msg, checker, severity, \ + status, filename, path in \ + q: + review_data = create_review_data(status) results.append( - ReportData(bugHash=report_hash, - checkedFile=path, + ReportData(bugHash=bug_id, + checkedFile=filename, checkerMsg=checker_msg, checkerId=checker, severity=severity, reviewData=review_data)) else: - q = self.__sortResultsQuery(q, sort_types) + q = session.query(Report.id, Report.file_id, Report.line, + Report.column, Report.detection_status, + Report.bug_id, Report.checker_message, + Report.checker_id, Report.severity, + ReviewStatus, File.filepath) \ + .outerjoin(File, Report.file_id == File.id) \ + .outerjoin(ReviewStatus, + ReviewStatus.bug_hash == Report.bug_id) \ + .filter(filter_expression) - for report, source_file, review_status in \ - q.limit(limit).offset(offset): + if run_ids: + q = q.filter(Report.run_id.in_(run_ids)) - review_data = create_review_data(review_status) + if cmp_data: + q = q.filter(Report.bug_id.in_(diff_hashes)) + + sort_types, sort_type_map, order_type_map = \ + get_sort_map(sort_types) + + q = sort_results_query(q, sort_types, sort_type_map, + order_type_map) + + q = q.limit(limit).offset(offset) + + for report_id, file_id, line, column, d_status, bug_id, \ + checker_msg, checker, severity, r_status, path \ + in q: + + review_data = create_review_data(r_status) results.append( - ReportData(runId=report.run_id, - bugHash=report.bug_id, - checkedFile=source_file.filepath, - checkerMsg=report.checker_message, - reportId=report.id, - fileId=source_file.id, - line=report.line, - column=report.column, - checkerId=report.checker_id, - severity=report.severity, + ReportData(bugHash=bug_id, + checkedFile=path, + checkerMsg=checker_msg, + reportId=report_id, + fileId=file_id, + line=line, + column=column, + checkerId=checker, + severity=severity, reviewData=review_data, detectionStatus=detection_status_enum( - report.detection_status)) - ) + d_status))) return results @@ -813,12 +829,13 @@ def getRunReportCounts(self, run_ids, report_filter, limit, offset): q = q.filter(Report.run_id.in_(run_ids)) q = q.outerjoin(File, Report.file_id == File.id) \ - .outerjoin(ReviewStatus, - ReviewStatus.bug_hash == Report.bug_id) \ - .outerjoin(Run, - Report.run_id == Run.id) \ - .filter(filter_expression) \ - .group_by(Run.id) + .outerjoin(ReviewStatus, + ReviewStatus.bug_hash == Report.bug_id) \ + .outerjoin(Run, + Report.run_id == Run.id) \ + .filter(filter_expression) \ + .order_by(Run.name) \ + .group_by(Run.id) if limit: q = q.limit(limit).offset(offset) @@ -853,10 +870,13 @@ def getRunResultCount(self, run_ids, report_filter, cmp_data): # There is no difference. return 0 - is_unique = report_filter is not None and report_filter.isUnique - q = get_run_result_query_helper(session, run_ids, report_filter, - cmp_data, diff_hashes, is_unique, - True) + filter_expression = process_report_filter_v2(report_filter) + q = session.query(Report.bug_id) + q = filter_report_filter(q, filter_expression, run_ids, cmp_data, + diff_hashes) + + if report_filter is not None and report_filter.isUnique: + q = q.group_by(Report.bug_id) report_count = q.count() if report_count is None: @@ -1324,21 +1344,23 @@ def getCheckerCounts(self, run_ids, report_filter, cmp_data, limit, else: q = session.query(Report.checker_id, Report.severity, - func.count(Report.bug_id)) + func.count(Report.id)) q = filter_report_filter(q, filter_expression, run_ids, cmp_data, diff_hashes) unique_checker_q = None if is_unique: - q = q.group_by(Report.bug_id).subquery() + q = group_by_unique_fields(q).subquery() unique_checker_q = session.query(q.c.checker_id, func.max(q.c.severity), func.count(q.c.bug_id)) \ - .group_by(q.c.checker_id) + .group_by(q.c.checker_id) \ + .order_by(q.c.checker_id) else: unique_checker_q = q.group_by(Report.checker_id, - Report.severity) + Report.severity) \ + .order_by(Report.checker_id) if limit: unique_checker_q = unique_checker_q.limit(limit).offset(offset) @@ -1384,19 +1406,19 @@ def getSeverityCounts(self, run_ids, report_filter, cmp_data): Report.bug_id) else: q = session.query(Report.severity, - func.count(Report.bug_id)) + func.count(Report.id)) q = filter_report_filter(q, filter_expression, run_ids, cmp_data, diff_hashes) severities = None if is_unique: - q = q.group_by(Report.bug_id).subquery() + q = group_by_unique_fields(q).subquery() severities = session.query(q.c.severity, func.count(q.c.bug_id)) \ - .group_by(q.c.severity).all() + .group_by(q.c.severity) else: - severities = q.group_by(Report.severity).all() + severities = q.group_by(Report.severity) results = dict(severities) @@ -1437,20 +1459,21 @@ def getCheckerMsgCounts(self, run_ids, report_filter, cmp_data, limit, Report.bug_id) else: q = session.query(Report.checker_message, - func.count(Report.bug_id)) + func.count(Report.id)) q = filter_report_filter(q, filter_expression, run_ids, cmp_data, diff_hashes) checker_messages = None if is_unique: - q = q.group_by(Report.bug_id).subquery() + q = group_by_unique_fields(q).subquery() checker_messages = session.query(q.c.checker_message, func.count(q.c.bug_id)) \ - .group_by(q.c.checker_message) + .group_by(q.c.checker_message) \ + .order_by(q.c.checker_message) else: - checker_messages = q.group_by(Report.checker_message, - Report.severity) + checker_messages = q.group_by(Report.checker_message) \ + .order_by(Report.checker_message) if limit: checker_messages = checker_messages.limit(limit).offset(offset) @@ -1494,20 +1517,20 @@ def getReviewStatusCounts(self, run_ids, report_filter, cmp_data): else: q = session.query(func.max(Report.bug_id), ReviewStatus.status, - func.count(Report.bug_id)) + func.count(Report.id)) q = filter_report_filter(q, filter_expression, run_ids, cmp_data, diff_hashes) review_statuses = None if is_unique: - q = q.group_by(Report.bug_id).subquery() + q = group_by_unique_fields(q).subquery() review_statuses = session.query(func.max(q.c.bug_id), q.c.status, func.count(q.c.bug_id)) \ - .group_by(q.c.status).all() + .group_by(q.c.status) else: - review_statuses = q.group_by(ReviewStatus.status).all() + review_statuses = q.group_by(ReviewStatus.status) for _, rev_status, count in review_statuses: if rev_status is None: @@ -1548,11 +1571,11 @@ def getFileCounts(self, run_ids, report_filter, cmp_data, limit, offset): stmt = session.query(Report.bug_id, Report.file_id) \ - .outerjoin(ReviewStatus, - ReviewStatus.bug_hash == Report.bug_id) \ - .outerjoin(File, - File.id == Report.file_id) \ - .filter(filter_expression) + .outerjoin(ReviewStatus, + ReviewStatus.bug_hash == Report.bug_id) \ + .outerjoin(File, + File.id == Report.file_id) \ + .filter(filter_expression) if run_ids: stmt = stmt.filter(Report.run_id.in_(run_ids)) @@ -1628,15 +1651,17 @@ def getRunHistoryTagCounts(self, run_ids, report_filter, cmp_data): tag_q = tag_q.group_by(RunHistory.run_id).subquery() q = session.query(tag_q.c.run_id, - Run.name, - RunHistory.time, - RunHistory.version_tag, - count_q.c.report_count) \ + func.max(Run.name).label('run_name'), + func.max(RunHistory.time), + func.max(RunHistory.version_tag), + func.sum(count_q.c.report_count)) \ .outerjoin(RunHistory, RunHistory.id == tag_q.c.run_history_id) \ .outerjoin(Run, Run.id == tag_q.c.run_id) \ .outerjoin(count_q, count_q.c.run_id == RunHistory.run_id) \ - .filter(RunHistory.version_tag.isnot(None)) + .filter(RunHistory.version_tag.isnot(None)) \ + .group_by(tag_q.c.run_id) \ + .order_by('run_name') for _, run_name, version_time, tag, count in q: if tag: diff --git a/tests/functional/report_viewer_api/test_report_filter.py b/tests/functional/report_viewer_api/test_report_filter.py index af431b430f..da17c24039 100644 --- a/tests/functional/report_viewer_api/test_report_filter.py +++ b/tests/functional/report_viewer_api/test_report_filter.py @@ -301,8 +301,8 @@ def test_uniqueing_compared_to_test_config(self): None) unique_bugs = set() - # Uniqueing is done based on file name, line number, and hash. + # Uniqueing is done based on bug hash. for b in bugs: - unique_bugs.add((b['file'], b['checker'], b['hash'])) + unique_bugs.add((b['hash'])) self.assertEqual(len(unique_bugs), run_result_count)