diff --git a/web/api/Makefile b/web/api/Makefile index fca420d1f7..17092ac16e 100644 --- a/web/api/Makefile +++ b/web/api/Makefile @@ -70,6 +70,11 @@ build: clean target_dirs rm -rf $(API_DIR)/gen-nodejs/ rm -rf $(API_DIR)/gen-py/ + # When someone is developing CodeChecker and changes the API multiple times + # we need to remove the changes from package-lock.json file to install + # the api correctly by npm. + git checkout -- $(API_DIR)/../server/vue-cli/package-lock.json + publish: build publish_py publish_js publish_py: diff --git a/web/api/js/codechecker-api-node/dist/codechecker-api-6.39.0.tgz b/web/api/js/codechecker-api-node/dist/codechecker-api-6.39.0.tgz deleted file mode 100644 index 7570990eda..0000000000 Binary files a/web/api/js/codechecker-api-node/dist/codechecker-api-6.39.0.tgz and /dev/null differ diff --git a/web/api/js/codechecker-api-node/dist/codechecker-api-6.40.0.tgz b/web/api/js/codechecker-api-node/dist/codechecker-api-6.40.0.tgz new file mode 100644 index 0000000000..74f37ce9e4 Binary files /dev/null and b/web/api/js/codechecker-api-node/dist/codechecker-api-6.40.0.tgz differ diff --git a/web/api/js/codechecker-api-node/package.json b/web/api/js/codechecker-api-node/package.json index 0440d4cad2..d06abd60a2 100644 --- a/web/api/js/codechecker-api-node/package.json +++ b/web/api/js/codechecker-api-node/package.json @@ -1,6 +1,6 @@ { "name": "codechecker-api", - "version": "6.39.0", + "version": "6.40.0", "description": "Generated node.js compatible API stubs for CodeChecker server.", "main": "lib", "homepage": "https://github.com/Ericsson/codechecker", diff --git a/web/api/py/codechecker_api/dist/codechecker_api.tar.gz b/web/api/py/codechecker_api/dist/codechecker_api.tar.gz index b2933a5fc4..d56801e81d 100644 Binary files a/web/api/py/codechecker_api/dist/codechecker_api.tar.gz and b/web/api/py/codechecker_api/dist/codechecker_api.tar.gz differ diff --git a/web/api/py/codechecker_api/setup.py b/web/api/py/codechecker_api/setup.py index 7751101996..34d8c2f8ff 100644 --- a/web/api/py/codechecker_api/setup.py +++ b/web/api/py/codechecker_api/setup.py @@ -8,7 +8,7 @@ with open('README.md', encoding='utf-8', errors="ignore") as f: long_description = f.read() -api_version = '6.39.0' +api_version = '6.40.0' setup( name='codechecker_api', diff --git a/web/api/py/codechecker_api_shared/dist/codechecker_api_shared.tar.gz b/web/api/py/codechecker_api_shared/dist/codechecker_api_shared.tar.gz index 307fa1ca80..223f04b10f 100644 Binary files a/web/api/py/codechecker_api_shared/dist/codechecker_api_shared.tar.gz and b/web/api/py/codechecker_api_shared/dist/codechecker_api_shared.tar.gz differ diff --git a/web/api/py/codechecker_api_shared/setup.py b/web/api/py/codechecker_api_shared/setup.py index ecf7cbcd99..ab804bc3be 100644 --- a/web/api/py/codechecker_api_shared/setup.py +++ b/web/api/py/codechecker_api_shared/setup.py @@ -8,7 +8,7 @@ with open('README.md', encoding='utf-8', errors="ignore") as f: long_description = f.read() -api_version = '6.39.0' +api_version = '6.40.0' setup( name='codechecker_api_shared', diff --git a/web/api/report_server.thrift b/web/api/report_server.thrift index 24274c8d5e..704f606881 100644 --- a/web/api/report_server.thrift +++ b/web/api/report_server.thrift @@ -367,6 +367,16 @@ struct ExportData { 2: map reviewData, // Map review data to report hashes. } +union AnalysisInfoFilter { + 1: i64 runId, + 2: i64 runHistoryId, + 3: i64 reportId, +} + +struct AnalysisInfo { + 1: string analyzerCommand, +} + service codeCheckerDBAccess { // Gives back all analyzed runs. @@ -384,10 +394,18 @@ service codeCheckerDBAccess { // Get check command for a run. // PERMISSION: PRODUCT_ACCESS + // !DEPRECATED Use getAnalysisInfo API to get the check commands. string getCheckCommand(1: i64 runHistoryId, 2: i64 runId) throws (1: codechecker_api_shared.RequestFailed requestError), + // Get analyzer commands based on the given filters. + // PERMISSION: PRODUCT_ACCESS + list getAnalysisInfo(1: AnalysisInfoFilter analysisInfoFilter, + 2: i64 limit, + 3: i64 offset) + throws (1: codechecker_api_shared.RequestFailed requestError), + // Get run history for runs. // If an empty run id list is provided the history // will be returned for all the available runs ordered by run history date. diff --git a/web/client/codechecker_client/cmd/store.py b/web/client/codechecker_client/cmd/store.py index 38b63bbcf8..376b77c3cb 100644 --- a/web/client/codechecker_client/cmd/store.py +++ b/web/client/codechecker_client/cmd/store.py @@ -30,7 +30,6 @@ from codechecker_api_shared.ttypes import RequestFailed, ErrorCode from codechecker_client import client as libclient -from codechecker_client.metadata import merge_metadata_json from codechecker_common import arg, logger, plist_parser, util, cmd_config from codechecker_common.report import Report @@ -640,12 +639,10 @@ def assemble_zip(inputs, zip_file, client): if file_hash: file_hash_with_review_status.add(file_hash) - metadata_files_to_merge = [] for input_dir_path in inputs: for root_dir_path, _, _ in os.walk(input_dir_path): metadata_file_path = os.path.join(root_dir_path, 'metadata.json') if os.path.exists(metadata_file_path): - metadata_files_to_merge.append(metadata_file_path) files_to_compress.add(metadata_file_path) skip_file_path = os.path.join(root_dir_path, 'skip_file') @@ -679,12 +676,6 @@ def assemble_zip(inputs, zip_file, client): zipf.write(ftc, zip_target) - merged_metadata = merge_metadata_json( - metadata_files_to_merge, len(inputs)) - - zipf.writestr(os.path.join('reports', 'metadata.json'), - json.dumps(merged_metadata)) - for f, h in file_to_hash.items(): if h in necessary_hashes or h in file_hash_with_review_status: LOG.debug("File contents for '%s' needed by the server", f) diff --git a/web/codechecker_web/shared/version.py b/web/codechecker_web/shared/version.py index 6e0d3a4908..3e52265b03 100644 --- a/web/codechecker_web/shared/version.py +++ b/web/codechecker_web/shared/version.py @@ -18,7 +18,7 @@ # The newest supported minor version (value) for each supported major version # (key) in this particular build. SUPPORTED_VERSIONS = { - 6: 39 + 6: 40 } # Used by the client to automatically identify the latest major and minor diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py new file mode 100644 index 0000000000..be88ad1701 --- /dev/null +++ b/web/server/codechecker_server/api/mass_store_run.py @@ -0,0 +1,1242 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- + +import base64 +import os +import sqlalchemy +import tempfile +import time +import zipfile +import zlib + +from collections import defaultdict +from datetime import datetime +from hashlib import sha256 +from typing import Any, Dict, List, NamedTuple, Optional, Set + +import codechecker_api_shared +from codechecker_api.codeCheckerDBAccess_v6 import ttypes + +from codechecker_common import plist_parser, skiplist_handler, util +from codechecker_common.logger import get_logger +from codechecker_common.source_code_comment_handler import \ + SourceCodeCommentHandler, SpellException, contains_codechecker_comment + +from codechecker_report_hash.hash import get_report_path_hash + +from ..database import db_cleanup +from ..database.config_db_model import Product +from ..database.database import DBSession +from ..database.run_db_model import AnalysisInfo, AnalyzerStatistic, \ + BugPathEvent, BugReportPoint, ExtendedReportData, File, FileContent, \ + Report, Run, RunHistory, RunLock +from ..metadata import checker_is_unavailable, get_analyzer_name, \ + MetadataInfoParser +from ..tmp import TemporaryDirectory + +from .report_server import ThriftRequestHandler +from .thrift_enum_helper import report_extended_data_type_str + + +LOG = get_logger('server') + + +# FIXME: when these types are introduced we need to use those. +SourceLineComments = List[Any] +ReportType = Any +MainSection = Dict + + +class PathEvents(NamedTuple): + paths: List[ttypes.BugPathPos] + events: List[ttypes.BugPathEvent] + extended_data: List[ttypes.ExtendedReportData] + + +def unzip(b64zip: str, output_dir: str) -> int: + """ + This function unzips the base64 encoded zip file. This zip is extracted + to a temporary directory and the ZIP is then deleted. The function returns + the size of the extracted decompressed zip file. + """ + if len(b64zip) == 0: + return 0 + + with tempfile.NamedTemporaryFile(suffix='.zip') as zip_file: + LOG.debug("Unzipping mass storage ZIP '%s' to '%s'...", + zip_file.name, output_dir) + + zip_file.write(zlib.decompress(base64.b64decode(b64zip))) + with zipfile.ZipFile(zip_file, 'r', allowZip64=True) as zipf: + try: + zipf.extractall(output_dir) + return os.stat(zip_file.name).st_size + except Exception: + LOG.error("Failed to extract received ZIP.") + import traceback + traceback.print_exc() + raise + return 0 + + +def get_file_content(file_path: str) -> bytes: + """Return the file content for the given filepath. """ + with open(file_path, 'rb') as f: + return f.read() + + +def parse_codechecker_review_comment( + source_file_name: str, + report_line: int, + checker_name: str +) -> SourceLineComments: + """Parse the CodeChecker review comments from a source file at a given + position. Returns an empty list if there are no comments. + """ + src_comment_data = [] + with open(source_file_name, encoding='utf-8', errors='ignore') as f: + if contains_codechecker_comment(f): + sc_handler = SourceCodeCommentHandler() + try: + src_comment_data = sc_handler.filter_source_line_comments( + f, report_line, checker_name) + except SpellException as ex: + LOG.warning("File %s contains %s", source_file_name, ex) + + return src_comment_data + + +def collect_paths_events( + report: ReportType, + file_ids: Dict[str, int], + files: Dict[str, str] +) -> PathEvents: + """ + This function creates the BugPathPos and BugPathEvent objects which belong + to a report. + + report -- A report object from the parsed plist file. + file_ids -- A dictionary which maps the file paths to file IDs in the + database. + files -- A list containing the file paths from the parsed plist file. The + order of this list must be the same as in the plist file. + + #TODO Multiple ranges could belong to an event or control node. + Only the first range from the list of ranges is stored into the + database. Further improvement can be to store and view all ranges + if there are more than one. + """ + path_events = PathEvents([], [], []) + + events = [i for i in report.bug_path if i.get('kind') == 'event'] + + # Create remaining data for bugs and send them to the server. In plist + # file the source and target of the arrows are provided as starting and + # ending ranges of the arrow. The path A->B->C is given as A->B and + # B->C, thus range B is provided twice. So in the loop only target + # points of the arrows are stored, and an extra insertion is done for + # the source of the first arrow before the loop. + report_path = [i for i in report.bug_path if i.get('kind') == 'control'] + + if report_path: + start_range = report_path[0]['edges'][0]['start'] + start1_line = start_range[0]['line'] + start1_col = start_range[0]['col'] + start2_line = start_range[1]['line'] + start2_col = start_range[1]['col'] + source_file_path = files[start_range[1]['file']] + path_events.paths.append(ttypes.BugPathPos( + start1_line, + start1_col, + start2_line, + start2_col, + file_ids[source_file_path])) + + for path in report_path: + try: + end_range = path['edges'][0]['end'] + end1_line = end_range[0]['line'] + end1_col = end_range[0]['col'] + end2_line = end_range[1]['line'] + end2_col = end_range[1]['col'] + source_file_path = files[end_range[1]['file']] + path_events.paths.append(ttypes.BugPathPos( + end1_line, + end1_col, + end2_line, + end2_col, + file_ids[source_file_path])) + except IndexError: + # Edges might be empty nothing can be stored. + continue + + for event in events: + file_path = files[event['location']['file']] + + start_loc = event['location'] + end_loc = event['location'] + # Range can provide more precise location information. + # Use that if available. + ranges = event.get("ranges") + if ranges: + start_loc = ranges[0][0] + end_loc = ranges[0][1] + + path_events.events.append(ttypes.BugPathEvent( + start_loc['line'], + start_loc['col'], + end_loc['line'], + end_loc['col'], + event['message'], + file_ids[file_path])) + + for macro in report.macro_expansions: + if not macro['expansion']: + continue + + file_path = files[macro['location']['file']] + + start_loc = macro['location'] + end_loc = macro['location'] + # Range can provide more precise location information. + # Use that if available. + ranges = macro.get("ranges") + if ranges: + start_loc = ranges[0][0] + end_loc = ranges[0][1] + + path_events.extended_data.append(ttypes.ExtendedReportData( + ttypes.ExtendedReportDataType.MACRO, + start_loc['line'], + start_loc['col'], + end_loc['line'], + end_loc['col'], + macro['expansion'], + file_ids[file_path])) + + for note in report.notes: + if not note['message']: + continue + + file_path = files[note['location']['file']] + + start_loc = note['location'] + end_loc = note['location'] + # Range can provide more precise location information. + # Use that if available. + ranges = note.get("ranges") + if ranges: + start_loc = ranges[0][0] + end_loc = ranges[0][1] + + path_events.extended_data.append(ttypes.ExtendedReportData( + ttypes.ExtendedReportDataType.NOTE, + start_loc['line'], + start_loc['col'], + end_loc['line'], + end_loc['col'], + note['message'], + file_ids[file_path])) + + return path_events + + +def add_file_record( + session: DBSession, + file_path: str, + content_hash: str +) -> Optional[int]: + """ + Add the necessary file record pointing to an already existing content. + Returns the added file record id or None, if the content_hash is not + found. + + This function must not be called between add_checker_run() and + finish_checker_run() functions when SQLite database is used! + add_checker_run() function opens a transaction which is closed by + finish_checker_run() and since SQLite doesn't support parallel + transactions, this API call will wait until the other transactions + finish. In the meantime the run adding transaction times out. + """ + file_record = session.query(File) \ + .filter(File.content_hash == content_hash, + File.filepath == file_path) \ + .one_or_none() + + if file_record: + return file_record.id + + try: + file_record = File(file_path, content_hash) + session.add(file_record) + session.commit() + except sqlalchemy.exc.IntegrityError as ex: + LOG.error(ex) + # Other transaction might have added the same file in the + # meantime. + session.rollback() + file_record = session.query(File) \ + .filter(File.content_hash == content_hash, + File.filepath == file_path).one_or_none() + + return file_record.id if file_record else None + + +class MassStoreRun: + def __init__( + self, + report_server: ThriftRequestHandler, + name: str, + tag: Optional[str], + version: Optional[str], + b64zip: str, + force: bool, + trim_path_prefixes: Optional[List[str]], + description: Optional[str] + ): + """ Initialize object. """ + self.__report_server = report_server + + self.__name = name + self.__tag = tag + self.__version = version + self.__b64zip = b64zip + self.__force = force + self.__trim_path_prefixes = trim_path_prefixes + self.__description = description + + self.__mips: Dict[str, MetadataInfoParser] = {} + self.__analysis_info: Dict[str, AnalysisInfo] = {} + self.__duration: int = 0 + self.__wrong_src_code_comments: List[str] = [] + self.__already_added_report_hashes: Set[str] = set() + self.__new_report_hashes: Set[str] = set() + self.__enabled_checkers: Set[str] = set() + self.__disabled_checkers: Set[str] = set() + self.__all_report_checkers: Set[str] = set() + + @property + def __manager(self): + return self.__report_server._manager + + @property + def __Session(self): + return self.__report_server._Session + + @property + def __config_database(self): + return self.__report_server._config_database + + @property + def __product(self): + return self.__report_server._product + + @property + def __context(self): + return self.__report_server._context + + @property + def user_name(self): + return self.__report_server._get_username() + + def __check_run_limit(self): + """ + Checks the maximum allowed of uploadable runs for the current product. + """ + max_run_count = self.__manager.get_max_run_count() + + with DBSession(self.__config_database) as session: + product = session.query(Product).get(self.__product.id) + if product.run_limit: + max_run_count = product.run_limit + + # Session that handles constraints on the run. + with DBSession(self.__Session) as session: + if not max_run_count: + return + + LOG.debug("Check the maximum number of allowed runs which is %d", + max_run_count) + + run = session.query(Run) \ + .filter(Run.name == self.__name) \ + .one_or_none() + + # If max_run_count is not set in the config file, it will allow + # the user to upload unlimited runs. + + run_count = session.query(Run.id).count() + + # If we are not updating a run or the run count is reached the + # limit it will throw an exception. + if not run and run_count >= max_run_count: + remove_run_count = run_count - max_run_count + 1 + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.GENERAL, + f"You reached the maximum number of allowed runs " + f"({run_count}/{max_run_count})! Please remove at least " + f"{remove_run_count} run(s) before you try it again.") + + def __store_run_lock(self, session: DBSession): + """ + Store a RunLock record for the given run name into the database. + """ + try: + # If the run can be stored, we need to lock it first. If there is + # already a lock in the database for the given run name which is + # expired and multiple processes are trying to get this entry from + # the database for update we may get the following exception: + # could not obtain lock on row in relation "run_locks" + # This is the reason why we have to wrap this query to a try/except + # block. + run_lock = session.query(RunLock) \ + .filter(RunLock.name == self.__name) \ + .with_for_update(nowait=True).one_or_none() + except (sqlalchemy.exc.OperationalError, + sqlalchemy.exc.ProgrammingError) as ex: + LOG.error("Failed to get run lock for '%s': %s", self.__name, ex) + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.DATABASE, + "Someone is already storing to the same run. Please wait " + "while the other storage is finished and try it again.") + + if not run_lock: + # If there is no lock record for the given run name, the run + # is not locked -- create a new lock. + run_lock = RunLock(self.__name, self.user_name) + session.add(run_lock) + elif run_lock.has_expired( + db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE): + # There can be a lock in the database, which has already + # expired. In this case, we assume that the previous operation + # has failed, and thus, we can re-use the already present lock. + run_lock.touch() + run_lock.username = self.user_name + else: + # In case the lock exists and it has not expired, we must + # consider the run a locked one. + when = run_lock.when_expires( + db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE) + + username = run_lock.username if run_lock.username is not None \ + else "another user" + + LOG.info("Refusing to store into run '%s' as it is locked by " + "%s. Lock will expire at '%s'.", self.__name, username, + when) + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.DATABASE, + "The run named '{0}' is being stored into by {1}. If the " + "other store operation has failed, this lock will expire " + "at '{2}'.".format(self.__name, username, when)) + + # At any rate, if the lock has been created or updated, commit it + # into the database. + try: + session.commit() + except (sqlalchemy.exc.IntegrityError, + sqlalchemy.orm.exc.StaleDataError): + # The commit of this lock can fail. + # + # In case two store ops attempt to lock the same run name at the + # same time, committing the lock in the transaction that commits + # later will result in an IntegrityError due to the primary key + # constraint. + # + # In case two store ops attempt to lock the same run name with + # reuse and one of the operation hangs long enough before COMMIT + # so that the other operation commits and thus removes the lock + # record, StaleDataError is raised. In this case, also consider + # the run locked, as the data changed while the transaction was + # waiting, as another run wholly completed. + + LOG.info("Run '%s' got locked while current transaction " + "tried to acquire a lock. Considering run as locked.", + self.__name) + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.DATABASE, + "The run named '{0}' is being stored into by another " + "user.".format(self.__name)) + + def __free_run_lock(self, session: DBSession): + """ Remove the lock from the database for the given run name. """ + # Using with_for_update() here so the database (in case it supports + # this operation) locks the lock record's row from any other access. + run_lock = session.query(RunLock) \ + .filter(RunLock.name == self.__name) \ + .with_for_update(nowait=True).one() + session.delete(run_lock) + session.commit() + + def __store_source_files( + self, + source_root: str, + filename_to_hash: Dict[str, str] + ) -> Dict[str, int]: + """ Storing file contents from plist. """ + + file_path_to_id = {} + + for file_name, file_hash in filename_to_hash.items(): + source_file_name = os.path.join(source_root, file_name.strip("/")) + source_file_name = os.path.realpath(source_file_name) + LOG.debug("Storing source file: %s", source_file_name) + trimmed_file_path = util.trim_path_prefixes( + file_name, self.__trim_path_prefixes) + + if not os.path.isfile(source_file_name): + # The file was not in the ZIP file, because we already + # have the content. Let's check if we already have a file + # record in the database or we need to add one. + + LOG.debug('%s not found or already stored.', trimmed_file_path) + with DBSession(self.__Session) as session: + fid = add_file_record( + session, trimmed_file_path, file_hash) + + if not fid: + LOG.error("File ID for %s is not found in the DB with " + "content hash %s. Missing from ZIP?", + source_file_name, file_hash) + file_path_to_id[trimmed_file_path] = fid + LOG.debug("%d fileid found", fid) + continue + + with DBSession(self.__Session) as session: + file_path_to_id[trimmed_file_path] = self.__add_file_content( + session, trimmed_file_path, source_file_name, file_hash) + + return file_path_to_id + + def __add_file_content( + self, + session: DBSession, + file_path: str, + source_file_name: str, + content_hash: str + ) -> int: + """ + Add the necessary file contents. If the file is already stored in the + database then its ID returns. If content_hash in None then this + function calculates the content hash. Or if is available at the caller + and is provided then it will not be calculated again. + + This function must not be called between add_checker_run() and + finish_checker_run() functions when SQLite database is used! + add_checker_run() function opens a transaction which is closed by + finish_checker_run() and since SQLite doesn't support parallel + transactions, this API call will wait until the other transactions + finish. In the meantime the run adding transaction times out. + """ + + source_file_content = None + if not content_hash: + source_file_content = get_file_content(source_file_name) + + hasher = sha256() + hasher.update(source_file_content) + content_hash = hasher.hexdigest() + + file_content = session.query(FileContent).get(content_hash) + if not file_content: + if not source_file_content: + source_file_content = get_file_content(source_file_name) + try: + compressed_content = zlib.compress(source_file_content, + zlib.Z_BEST_COMPRESSION) + fc = FileContent(content_hash, compressed_content) + session.add(fc) + session.commit() + except sqlalchemy.exc.IntegrityError: + # Other transaction moght have added the same content in + # the meantime. + session.rollback() + + file_record = session.query(File) \ + .filter(File.content_hash == content_hash, + File.filepath == file_path) \ + .one_or_none() + + if not file_record: + try: + file_record = File(file_path, content_hash) + session.add(file_record) + session.commit() + except sqlalchemy.exc.IntegrityError as ex: + LOG.error(ex) + # Other transaction might have added the same file in the + # meantime. + session.rollback() + file_record = session.query(File) \ + .filter(File.content_hash == content_hash, + File.filepath == file_path) \ + .one_or_none() + + return file_record.id + + def __store_analysis_statistics( + self, + session: DBSession, + run_history_id: int + ): + """ + Store analysis statistics for the given run history. + + It will unique the statistics for each analyzer type based on the + metadata information. + """ + stats = defaultdict(lambda: { + "versions": set(), + "failed_sources": set(), + "successful_sources": set(), + "successful": 0 + }) + + for mip in self.__mips.values(): + self.__duration += int(sum(mip.check_durations)) + + for analyzer_type, res in mip.analyzer_statistics.items(): + if "version" in res: + stats[analyzer_type]["versions"].add(res["version"]) + + if "failed_sources" in res: + if self.__version == '6.9.0': + stats[analyzer_type]["failed_sources"].add( + 'Unavailable in CodeChecker 6.9.0!') + else: + stats[analyzer_type]["failed_sources"].update( + res["failed_sources"]) + + if "successful_sources" in res: + stats[analyzer_type]["successful_sources"].update( + res["successful_sources"]) + + if "successful" in res: + stats[analyzer_type]["successful"] += res["successful"] + + for analyzer_type, stat in stats.items(): + analyzer_version = None + if stat["versions"]: + analyzer_version = zlib.compress( + "; ".join(stat["versions"]).encode('utf-8'), + zlib.Z_BEST_COMPRESSION) + + failed = 0 + compressed_files = None + if stat["failed_sources"]: + compressed_files = zlib.compress( + '\n'.join(stat["failed_sources"]).encode('utf-8'), + zlib.Z_BEST_COMPRESSION) + + failed = len(stat["failed_sources"]) + + successful = len(stat["successful_sources"]) \ + if stat["successful_sources"] else stat["successful"] + + analyzer_statistics = AnalyzerStatistic( + run_history_id, analyzer_type, analyzer_version, + successful, failed, compressed_files) + + session.add(analyzer_statistics) + + def __store_analysis_info( + self, + session: DBSession, + run_history: RunHistory + ): + """ Store analysis info for the given run history. """ + for src_dir_path, mip in self.__mips.items(): + for analyzer_command in mip.check_commands: + cmd = zlib.compress( + analyzer_command.encode("utf-8"), + zlib.Z_BEST_COMPRESSION) + + analysis_info = session \ + .query(AnalysisInfo) \ + .filter(AnalysisInfo.analyzer_command == cmd) \ + .one_or_none() + + if not analysis_info: + analysis_info = AnalysisInfo(analyzer_command=cmd) + session.add(analysis_info) + + run_history.analysis_info.append(analysis_info) + self.__analysis_info[src_dir_path] = analysis_info + + def __add_checker_run( + self, + session: DBSession, + run_history_time: datetime + ) -> int: + """ + Store run related data to the database. + By default updates the results if name already exists. + Using the force flag removes existing analysis results for a run. + """ + try: + LOG.debug("Adding run '%s'...", self.__name) + + run = session.query(Run) \ + .filter(Run.name == self.__name) \ + .one_or_none() + + if run and self.__force: + # Clean already collected results. + if not run.can_delete: + # Deletion is already in progress. + msg = f"Can't delete {run.id}" + LOG.debug(msg) + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.DATABASE, + msg) + + LOG.info('Removing previous analysis results...') + session.delete(run) + # Not flushing after delete leads to a constraint violation + # error later, when adding run entity with the same name as + # the old one. + session.flush() + + checker_run = Run(self.__name, self.__version) + session.add(checker_run) + session.flush() + run_id = checker_run.id + + elif run: + # There is already a run, update the results. + run.date = datetime.now() + run.duration = -1 + session.flush() + run_id = run.id + else: + # There is no run create new. + checker_run = Run(self.__name, self.__version) + session.add(checker_run) + session.flush() + run_id = checker_run.id + + # Add run to the history. + LOG.debug("Adding run history.") + + if self.__tag is not None: + run_history = session.query(RunHistory) \ + .filter(RunHistory.run_id == run_id, + RunHistory.version_tag == self.__tag) \ + .one_or_none() + + if run_history: + run_history.version_tag = None + session.add(run_history) + + cc_versions = set() + for mip in self.__mips.values(): + if mip.cc_version: + cc_versions.add(mip.cc_version) + + cc_version = '; '.join(cc_versions) if cc_versions else None + run_history = RunHistory( + run_id, self.__tag, self.user_name, run_history_time, + cc_version, self.__description) + + session.add(run_history) + session.flush() + + LOG.debug("Adding run done.") + + self.__store_analysis_statistics(session, run_history.id) + self.__store_analysis_info(session, run_history) + + session.flush() + LOG.debug("Storing analysis statistics done.") + + return run_id + except Exception as ex: + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.GENERAL, + str(ex)) + + def __add_report( + self, + session: DBSession, + run_id: int, + file_id: int, + main_section: MainSection, + path_events: PathEvents, + detection_status: str, + detection_time: datetime, + analysis_info: AnalysisInfo, + analyzer_name: Optional[str] = None + ) -> int: + """ Add report to the database. """ + def store_bug_events(report_id: int): + """ Add bug path events. """ + for i, event in enumerate(path_events.events): + bpe = BugPathEvent( + event.startLine, event.startCol, event.endLine, + event.endCol, i, event.msg, event.fileId, report_id) + session.add(bpe) + + def store_bug_path(report_id: int): + """ Add bug path points. """ + for i, piece in enumerate(path_events.paths): + brp = BugReportPoint( + piece.startLine, piece.startCol, piece.endLine, + piece.endCol, i, piece.fileId, report_id) + session.add(brp) + + def store_extended_bug_data(report_id: int): + """ Add extended bug data objects to the database session. """ + for data in path_events.extended_data: + data_type = report_extended_data_type_str(data.type) + red = ExtendedReportData( + data.startLine, data.startCol, data.endLine, data.endCol, + data.message, data.fileId, report_id, data_type) + session.add(red) + + try: + checker_name = main_section['check_name'] + severity_name = self.__context.severity_map.get(checker_name) + severity = ttypes.Severity._NAMES_TO_VALUES[severity_name] + report = Report( + run_id, main_section['issue_hash_content_of_line_in_context'], + file_id, main_section['description'], + checker_name or 'NOT FOUND', + main_section['category'], main_section['type'], + main_section['location']['line'], + main_section['location']['col'], + severity, detection_status, detection_time, + len(path_events.events), analyzer_name) + + session.add(report) + session.flush() + + LOG.debug("storing bug path") + store_bug_path(report.id) + + LOG.debug("storing events") + store_bug_events(report.id) + + LOG.debug("storing extended report data") + store_extended_bug_data(report.id) + + if analysis_info: + report.analysis_info.append(analysis_info) + + return report.id + + except Exception as ex: + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.GENERAL, + str(ex)) + + def __process_report_file( + self, + report_file_path: str, + session: DBSession, + source_root: str, + run_id: int, + file_path_to_id: Dict[str, int], + run_history_time: datetime, + skip_handler: Optional[skiplist_handler.SkipListHandler], + hash_map_reports: Dict[str, List[Any]] + ) -> bool: + """ + Process and save reports from the given report file to the database. + """ + try: + files, reports = plist_parser.parse_plist_file(report_file_path) + except Exception as ex: + LOG.warning('Parsing the plist failed: %s', str(ex)) + return False + + if not reports: + return True + + trimmed_files = {} + file_ids = {} + missing_ids_for_files = [] + + for k, v in files.items(): + trimmed_files[k] = \ + util.trim_path_prefixes(v, self.__trim_path_prefixes) + + for file_name in trimmed_files.values(): + file_id = file_path_to_id.get(file_name, -1) + if file_id == -1: + missing_ids_for_files.append(file_name) + continue + + file_ids[file_name] = file_id + + if missing_ids_for_files: + LOG.warning("Failed to get file path id for '%s'!", + ' '.join(missing_ids_for_files)) + return False + + def set_review_status(report: ReportType): + """ + Set review status for the given report if there is any source code + comment. + """ + checker_name = report.main['check_name'] + last_report_event = report.bug_path[-1] + + # The original file path is needed here not the trimmed + # because the source files are extracted as the original + # file path. + file_name = files[last_report_event['location']['file']] + + source_file_name = os.path.realpath( + os.path.join(source_root, file_name.strip("/"))) + + # Check and store source code comments. + if not os.path.isfile(source_file_name): + return + + report_line = last_report_event['location']['line'] + source_file = os.path.basename(file_name) + + src_comment_data = parse_codechecker_review_comment( + source_file_name, report_line, checker_name) + + if len(src_comment_data) == 1: + status = src_comment_data[0]['status'] + rw_status = ttypes.ReviewStatus.FALSE_POSITIVE + if status == 'confirmed': + rw_status = ttypes.ReviewStatus.CONFIRMED + elif status == 'intentional': + rw_status = ttypes.ReviewStatus.INTENTIONAL + + self.__report_server._setReviewStatus( + session, report.report_hash, rw_status, + src_comment_data[0]['message'], run_history_time) + elif len(src_comment_data) > 1: + LOG.warning( + "Multiple source code comment can be found " + "for '%s' checker in '%s' at line %s. " + "This bug will not be suppressed!", + checker_name, source_file, report_line) + + self.__wrong_src_code_comments.append( + f"{source_file}|{report_line}|{checker_name}") + + root_dir_path = os.path.dirname(report_file_path) + mip = self.__mips[root_dir_path] + analysis_info = self.__analysis_info.get(root_dir_path) + + for report in reports: + self.__all_report_checkers.add(report.check_name) + + if skip_handler and skip_handler.should_skip(report.file_path): + continue + + report.trim_path_prefixes(self.__trim_path_prefixes) + + report_path_hash = get_report_path_hash(report) + if report_path_hash in self.__already_added_report_hashes: + LOG.debug('Not storing report. Already added: %s', report) + continue + + LOG.debug("Storing report to the database...") + + bug_id = report.report_hash + + detection_status = 'new' + detected_at = run_history_time + + if bug_id in hash_map_reports: + old_report = hash_map_reports[bug_id][0] + old_status = old_report.detection_status + detection_status = 'reopened' \ + if old_status == 'resolved' else 'unresolved' + detected_at = old_report.detected_at + + analyzer_name = get_analyzer_name( + report.check_name, mip.checker_to_analyzer, report.metadata) + + path_events = collect_paths_events(report, file_ids, trimmed_files) + + report_id = self.__add_report( + session, run_id, file_ids[report.file_path], report.main, + path_events, detection_status, detected_at, analysis_info, + analyzer_name) + + self.__new_report_hashes.add(bug_id) + self.__already_added_report_hashes.add(report_path_hash) + + set_review_status(report) + + LOG.debug("Storing report done. ID=%d", report_id) + + return True + + def __store_reports( + self, + session: DBSession, + report_dir: str, + source_root: str, + run_id: int, + file_path_to_id: Dict[str, int], + run_history_time: datetime + ): + """ Parse up and store the plist report files. """ + def get_skip_handler( + report_dir: str + ) -> Optional[skiplist_handler.SkipListHandler]: + """ Get a skip list handler based on the given report directory.""" + skip_file_path = os.path.join(report_dir, 'skip_file') + if not os.path.exists(skip_file_path): + return + + LOG.debug("Pocessing skip file %s", skip_file_path) + try: + with open(skip_file_path, + encoding="utf-8", errors="ignore") as f: + skip_content = f.read() + LOG.debug(skip_content) + + return skiplist_handler.SkipListHandler(skip_content) + except (IOError, OSError) as err: + LOG.warning("Failed to open skip file: %s", err) + + # Reset internal data. + self.__already_added_report_hashes = set() + self.__new_report_hashes = set() + self.__all_report_checkers = set() + + all_reports = session.query(Report) \ + .filter(Report.run_id == run_id) \ + .all() + + hash_map_reports = defaultdict(list) + for report in all_reports: + hash_map_reports[report.bug_id].append(report) + + enabled_checkers: Set[str] = set() + disabled_checkers: Set[str] = set() + + # Processing PList files. + for root_dir_path, _, report_file_paths in os.walk(report_dir): + LOG.debug("Get reports from '%s' directory", root_dir_path) + + skip_handler = get_skip_handler(root_dir_path) + + mip = self.__mips[root_dir_path] + enabled_checkers.update(mip.enabled_checkers) + disabled_checkers.update(mip.disabled_checkers) + + for f in report_file_paths: + if not f.endswith('.plist'): + continue + + LOG.debug("Parsing input file '%s'", f) + + report_file_path = os.path.join(root_dir_path, f) + self.__process_report_file( + report_file_path, session, source_root, run_id, + file_path_to_id, run_history_time, + skip_handler, hash_map_reports) + + # If a checker was found in a plist file it can not be disabled so we + # will add this to the enabled checkers list and remove this checker + # from the disabled checkers list. + # Also if multiple report directories are stored and a checker was + # enabled in one report directory but it was disabled in another + # directory we will mark this checker as enabled. + enabled_checkers |= self.__all_report_checkers + disabled_checkers -= self.__all_report_checkers + + reports_to_delete = set() + for bug_hash, reports in hash_map_reports.items(): + if bug_hash in self.__new_report_hashes: + reports_to_delete.update([x.id for x in reports]) + else: + for report in reports: + # We set the fix date of a report only if the report + # has not been fixed before. + if report.fixed_at: + continue + + checker = report.checker_id + if checker in disabled_checkers: + report.detection_status = 'off' + elif checker_is_unavailable(checker, enabled_checkers): + report.detection_status = 'unavailable' + else: + report.detection_status = 'resolved' + + report.fixed_at = run_history_time + + if reports_to_delete: + self.__report_server._removeReports( + session, list(reports_to_delete)) + + def finish_checker_run( + self, + session: DBSession, + run_id: int + ) -> bool: + """ Finish the storage of the given run. """ + try: + LOG.debug("Finishing checker run") + run = session.query(Run).get(run_id) + if not run: + return False + + run.mark_finished() + run.duration = self.__duration + + return True + except Exception as ex: + LOG.error(ex) + + return False + + def store(self) -> int: + """ Store run results to the server. """ + start_time = time.time() + + # Check constraints of the run. + self.__check_run_limit() + + with DBSession(self.__Session) as session: + self.__store_run_lock(session) + + try: + with TemporaryDirectory() as zip_dir: + LOG.info("[%s] Unzip storage file...", self.__name) + zip_size = unzip(self.__b64zip, zip_dir) + LOG.info("[%s] Unzip storage file done.", self.__name) + + if zip_size == 0: + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes. + ErrorCode.GENERAL, + "The received zip file content is empty!") + + LOG.debug("Using unzipped folder '%s'", zip_dir) + + source_root = os.path.join(zip_dir, 'root') + report_dir = os.path.join(zip_dir, 'reports') + content_hash_file = os.path.join( + zip_dir, 'content_hashes.json') + + filename_to_hash = \ + util.load_json_or_empty(content_hash_file, {}) + + LOG.info("[%s] Store source files...", self.__name) + file_path_to_id = self.__store_source_files( + source_root, filename_to_hash) + LOG.info("[%s] Store source files done.", self.__name) + + run_history_time = datetime.now() + + # Parse all metadata information from the report directory. + for root_dir_path, _, _ in os.walk(report_dir): + metadata_file_path = os.path.join( + root_dir_path, 'metadata.json') + + self.__mips[root_dir_path] = \ + MetadataInfoParser(metadata_file_path) + + # When we use multiple server instances and we try to run + # multiple storage to each server which contain at least two + # reports which have the same report hash and have source code + # comments it is possible that the following exception will be + # thrown: (psycopg2.extensions.TransactionRollbackError) + # deadlock detected. + # The problem is that the report hash is the key for the + # review data table and both of the store actions try to + # update the same review data row. + # Neither of the two processes can continue, and they will wait + # for each other indefinitely. PostgreSQL in this case will + # terminate one transaction with the above exception. + # For this reason in case of failure we will wait some seconds + # and try to run the storage again. + # For more information see #2655 and #2653 issues on github. + max_num_of_tries = 3 + num_of_tries = 0 + sec_to_wait_after_failure = 60 + while True: + try: + # This session's transaction buffer stores the actual + # run data into the database. + with DBSession(self.__Session) as session: + # Load the lock record for "FOR UPDATE" so that the + # transaction that handles the run's store + # operations has a lock on the database row itself. + run_lock = session.query(RunLock) \ + .filter(RunLock.name == self.__name) \ + .with_for_update(nowait=True).one() + + # Do not remove this seemingly dummy print, we need + # to make sure that the execution of the SQL + # statement is not optimised away and the fetched + # row is not garbage collected. + LOG.debug("Storing into run '%s' locked at '%s'.", + self.__name, run_lock.locked_at) + + # Actual store operation begins here. + run_id = self.__add_checker_run( + session, run_history_time) + + LOG.info("[%s] Store reports...", self.__name) + self.__store_reports( + session, report_dir, source_root, run_id, + file_path_to_id, run_history_time) + LOG.info("[%s] Store reports done.", self.__name) + + self.finish_checker_run(session, run_id) + + session.commit() + + LOG.info("'%s' stored results (%s KB " + "/decompressed/) to run '%s' in %s " + "seconds.", self.user_name, + round(zip_size / 1024), self.__name, + round(time.time() - start_time, 2)) + + return run_id + except (sqlalchemy.exc.OperationalError, + sqlalchemy.exc.ProgrammingError) as ex: + num_of_tries += 1 + + if num_of_tries == max_num_of_tries: + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes. + ErrorCode.DATABASE, + "Storing reports to the database failed: " + "{0}".format(ex)) + + LOG.error("Storing reports of '%s' run failed: " + "%s.\nWaiting %d sec before trying to store " + "it again!", self.__name, ex, + sec_to_wait_after_failure) + time.sleep(sec_to_wait_after_failure) + sec_to_wait_after_failure *= 2 + except Exception as ex: + LOG.error("Failed to store results: %s", ex) + import traceback + traceback.print_exc() + raise + finally: + # In any case if the "try" block's execution began, a run lock must + # exist, which can now be removed, as storage either completed + # successfully, or failed in a detectable manner. + # (If the failure is undetectable, the coded grace period expiry + # of the lock will allow further store operations to the given + # run name.) + with DBSession(self.__Session) as session: + self.__free_run_lock(session) + + if self.__wrong_src_code_comments: + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.SOURCE_FILE, + "Multiple source code comment can be found with the same " + "checker name for same bug!", + self.__wrong_src_code_comments) diff --git a/web/server/codechecker_server/api/report_server.py b/web/server/codechecker_server/api/report_server.py index 6669e87ba6..a80d2fe271 100644 --- a/web/server/codechecker_server/api/report_server.py +++ b/web/server/codechecker_server/api/report_server.py @@ -9,19 +9,15 @@ Handle Thrift requests. """ - import base64 import os import re import shlex -import tempfile -import time -import zipfile import zlib from collections import defaultdict from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional, Set +from typing import List import sqlalchemy from sqlalchemy.sql.expression import or_, and_, not_, func, \ @@ -29,18 +25,14 @@ import codechecker_api_shared from codechecker_api.codeCheckerDBAccess_v6 import constants, ttypes -from codechecker_api.codeCheckerDBAccess_v6.ttypes import BugPathPos, \ - CheckerCount, CommentData, DiffType, Encoding, RunHistoryData, Order, \ - ReportData, ReportDetails, ReviewData, RunData, RunFilter, \ - RunReportCount, RunSortType, RunTagCount, SourceComponentData, \ +from codechecker_api.codeCheckerDBAccess_v6.ttypes import AnalysisInfoFilter, \ + BugPathPos, CheckerCount, CommentData, DiffType, Encoding, \ + RunHistoryData, Order, ReportData, ReportDetails, ReviewData, RunData, \ + RunFilter, RunReportCount, RunSortType, RunTagCount, SourceComponentData, \ SourceFileData, SortMode, SortType, ExportData -from codechecker_common import plist_parser, skiplist_handler -from codechecker_common.source_code_comment_handler import \ - SourceCodeCommentHandler, SpellException, contains_codechecker_comment from codechecker_common import util from codechecker_common.logger import get_logger -from codechecker_report_hash.hash import get_report_path_hash from codechecker_web.shared import webserver_context from codechecker_web.shared import convert @@ -52,18 +44,15 @@ from ..database.config_db_model import Product from ..database.database import conv, DBSession, escape_like from ..database.run_db_model import \ - AnalyzerStatistic, Report, ReviewStatus, File, Run, RunHistory, \ - RunLock, Comment, BugPathEvent, BugReportPoint, \ - FileContent, SourceComponent, ExtendedReportData -from ..metadata import checker_is_unavailable, get_analyzer_name, \ - MetadataInfoParser -from ..tmp import TemporaryDirectory + AnalysisInfo, AnalyzerStatistic, BugPathEvent, BugReportPoint, Comment, \ + ExtendedReportData, File, FileContent, Report, ReportAnalysisInfo, \ + ReviewStatus, Run, RunHistory, RunHistoryAnalysisInfo, RunLock, \ + SourceComponent from .thrift_enum_helper import detection_status_enum, \ detection_status_str, review_status_enum, review_status_str, \ report_extended_data_type_enum -from . import store_handler LOG = get_logger('server') @@ -147,6 +136,8 @@ def wrapper(*args, **kwargs): LOG.warning("%s:\n%s", func_name, rf.message) raise except Exception as ex: + import traceback + traceback.print_exc() msg = str(ex) LOG.warning("%s:\n%s", func_name, msg) raise codechecker_api_shared.ttypes.RequestFailed( @@ -155,28 +146,6 @@ def wrapper(*args, **kwargs): return wrapper -def parse_codechecker_review_comment(source_file_name, - report_line, - checker_name): - """Parse the CodeChecker review comments from a source file at a given - position. Returns an empty list if there are no comments. - """ - src_comment_data = [] - with open(source_file_name, - encoding='utf-8', - errors='ignore') as sf: - if contains_codechecker_comment(sf): - sc_handler = SourceCodeCommentHandler() - try: - src_comment_data = sc_handler.filter_source_line_comments( - sf, - report_line, - checker_name) - except SpellException as ex: - LOG.warning("File %s contains %s", source_file_name, ex) - return src_comment_data - - def get_component_values(session, component_name): """ Get component values by component names and returns a tuple where the @@ -780,32 +749,6 @@ def get_comment_msg(comment): return message -def unzip(b64zip, output_dir): - """ - This function unzips the base64 encoded zip file. This zip is extracted - to a temporary directory and the ZIP is then deleted. The function returns - the size of the extracted decompressed zip file. - """ - if len(b64zip) == 0: - return 0 - - with tempfile.NamedTemporaryFile(suffix='.zip') as zip_file: - LOG.debug("Unzipping mass storage ZIP '%s' to '%s'...", - zip_file.name, output_dir) - - zip_file.write(zlib.decompress(base64.b64decode(b64zip))) - with zipfile.ZipFile(zip_file, 'r', allowZip64=True) as zipf: - try: - zipf.extractall(output_dir) - return os.stat(zip_file.name).st_size - except Exception: - LOG.error("Failed to extract received ZIP.") - import traceback - traceback.print_exc() - raise - return 0 - - def create_review_data(review_status): if review_status: return ReviewData(status=review_status_enum(review_status.status), @@ -1023,24 +966,24 @@ def __init__(self, raise ValueError("Cannot initialize request handler without " "a product to serve.") - self.__manager = manager - self.__product = product - self.__auth_session = auth_session - self.__config_database = config_database + self._manager = manager + self._product = product + self._auth_session = auth_session + self._config_database = config_database self.__checker_md_docs = checker_md_docs self.__checker_doc_map = checker_md_docs_map self.__package_version = package_version - self.__Session = Session - self.__context = context + self._Session = Session + self._context = context self.__permission_args = { 'productID': product.id } - def __get_username(self): + def _get_username(self): """ Returns the actually logged in user name. """ - return self.__auth_session.user if self.__auth_session else "Anonymous" + return self._auth_session.user if self._auth_session else "Anonymous" def __require_permission(self, required): """ @@ -1048,12 +991,12 @@ def __require_permission(self, required): have any of the given permissions. """ - with DBSession(self.__config_database) as session: + with DBSession(self._config_database) as session: args = dict(self.__permission_args) args['config_db_session'] = session if not any([permissions.require_permission( - perm, args, self.__auth_session) + perm, args, self._auth_session) for perm in required]): raise codechecker_api_shared.ttypes.RequestFailed( codechecker_api_shared.ttypes.ErrorCode.UNAUTHORIZED, @@ -1073,7 +1016,7 @@ def __require_store(self): def __add_comment(self, bug_id, message, kind=CommentKindValue.USER, date=None): """ Creates a new comment object. """ - user = self.__get_username() + user = self._get_username() return Comment(bug_id, user, message.encode('utf-8'), @@ -1086,7 +1029,7 @@ def getRunData(self, run_filter, limit, offset, sort_mode): limit = verify_limit_range(limit) - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: # Count the reports subquery. stmt = session.query(Report.run_id, @@ -1186,34 +1129,80 @@ def getRunData(self, run_filter, limit, offset, sort_mode): def getRunCount(self, run_filter): self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: query = session.query(Run.id) query = process_run_filter(session, query, run_filter) return query.count() + # DEPRECATED: use getAnalysisInfo API function instead of this function. def getCheckCommand(self, run_history_id, run_id): - self.__require_access() + """ Get analyzer command based on the given filter. """ + limit = None + offset = 0 + analysis_info_filter = AnalysisInfoFilter( + runId=run_id, + runHistoryId=run_history_id) - if not run_history_id and not run_id: - return "" + analysis_info = self.getAnalysisInfo( + analysis_info_filter, limit, offset) - with DBSession(self.__Session) as session: - query = session.query(RunHistory.check_command) + return "; ".join([i.analyzerCommand for i in analysis_info]) - if run_history_id: - query = query.filter(RunHistory.id == run_history_id) - elif run_id: - query = query.filter(RunHistory.run_id == run_id) \ + @exc_to_thrift_reqfail + @timeit + def getAnalysisInfo(self, analysis_info_filter, limit, offset): + """ Get analysis information based on the given filter. """ + self.__require_access() + + res: List[ttypes.AnalysisInfo] = [] + if not analysis_info_filter: + return res + + analysis_info_query = None + with DBSession(self._Session) as session: + run_id = analysis_info_filter.runId + run_history_ids = None + if run_id is not None: + run_history_ids = session \ + .query(RunHistory.id) \ + .filter(RunHistory.run_id == run_id) \ .order_by(RunHistory.time.desc()) \ .limit(1) - history = query.first() - - if not history or not history[0]: - return "" + if run_history_ids is None: + run_history_ids = [analysis_info_filter.runHistoryId] + + if run_history_ids is not None: + rh_a_tbl = RunHistoryAnalysisInfo + analysis_info_query = session.query(AnalysisInfo) \ + .outerjoin( + rh_a_tbl, + rh_a_tbl.c.analysis_info_id == AnalysisInfo.id) \ + .filter(rh_a_tbl.c.run_history_id.in_(run_history_ids)) + + report_id = analysis_info_filter.reportId + if report_id is not None: + r_a_tbl = ReportAnalysisInfo + analysis_info_query = session.query(AnalysisInfo) \ + .outerjoin( + r_a_tbl, + r_a_tbl.c.analysis_info_id == AnalysisInfo.id) \ + .filter(r_a_tbl.c.report_id == report_id) + + if analysis_info_query: + if limit: + analysis_info_query = analysis_info_query \ + .limit(limit).offset(offset) + + for cmd in analysis_info_query: + command = \ + zlib.decompress(cmd.analyzer_command).decode('utf-8') + + res.append(ttypes.AnalysisInfo( + analyzerCommand=command)) - return zlib.decompress(history[0]).decode('utf-8') + return res @exc_to_thrift_reqfail @timeit @@ -1222,7 +1211,7 @@ def getRunHistory(self, run_ids, limit, offset, run_history_filter): limit = verify_limit_range(limit) - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: res = session.query(RunHistory) @@ -1260,7 +1249,7 @@ def getRunHistory(self, run_ids, limit, offset, run_history_filter): def getRunHistoryCount(self, run_ids, run_history_filter): self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: query = session.query(RunHistory.id) query = process_run_history_filter(query, run_ids, @@ -1273,7 +1262,7 @@ def getRunHistoryCount(self, run_ids, run_history_filter): def getReport(self, reportId): self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: result = session.query(Report, File, @@ -1321,7 +1310,7 @@ def getDiffResultsHash(self, run_ids, report_hashes, diff_type, skip_statuses_str = [detection_status_str(status) for status in skip_detection_statuses] - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: if diff_type == DiffType.NEW: # In postgresql we can select multiple rows filled with # constants by using `unnest` function. In sqlite we have to @@ -1337,7 +1326,7 @@ def getDiffResultsHash(self, run_ids, report_hashes, diff_type, base_hashes = \ filter_open_reports_in_tags(base_hashes, run_ids, tag_ids) - if self.__product.driver_name == 'postgresql': + if self._product.driver_name == 'postgresql': new_hashes = select([func.unnest(report_hashes) .label('bug_id')]) \ .except_(base_hashes).alias('new_bugs') @@ -1389,7 +1378,7 @@ def getRunResults(self, run_ids, limit, offset, sort_types, limit = verify_limit_range(limit) - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: results = [] filter_expression, join_tables = process_report_filter( @@ -1559,7 +1548,8 @@ def getRunReportCounts(self, run_ids, report_filter, limit, offset): limit = verify_limit_range(limit) results = [] - with DBSession(self.__Session) as session: + + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter) @@ -1589,7 +1579,7 @@ def getRunReportCounts(self, run_ids, report_filter, limit, offset): def getRunResultCount(self, run_ids, report_filter, cmp_data): self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -1629,7 +1619,7 @@ def getReportDetails(self, reportId): - reportId """ self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: return get_report_details(session, [reportId])[reportId] def _setReviewStatus(self, session, report_hash, status, @@ -1650,7 +1640,7 @@ def _setReviewStatus(self, session, report_hash, status, old_msg = review_status.message or None new_status = review_status_str(status) - new_user = self.__get_username() + new_user = self._get_username() new_message = message.encode('utf8') if message else b'' # Review status is a shared table among runs. When multiple runs @@ -1699,8 +1689,8 @@ def isReviewStatusChangeDisabled(self): """ Return True if review status change is disabled. """ - with DBSession(self.__config_database) as session: - product = session.query(Product).get(self.__product.id) + with DBSession(self._config_database) as session: + product = session.query(Product).get(self._product.id) return product.is_review_status_change_disabled @exc_to_thrift_reqfail @@ -1717,7 +1707,7 @@ def changeReviewStatus(self, report_id, status, message): raise codechecker_api_shared.ttypes.RequestFailed( codechecker_api_shared.ttypes.ErrorCode.GENERAL, msg) - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: report = session.query(Report).get(report_id) if report: res = self._setReviewStatus( @@ -1730,7 +1720,7 @@ def changeReviewStatus(self, report_id, status, message): LOG.info("Review status of report '%s' was changed to '%s' by %s.", report_id, review_status_str(status), - self.__get_username()) + self._get_username()) return res @@ -1742,7 +1732,7 @@ def getComments(self, report_id): """ self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: report = session.query(Report).get(report_id) if report: result = [] @@ -1775,7 +1765,7 @@ def getCommentCount(self, report_id): Return the number of comments for the given bug. """ self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: report = session.query(Report).get(report_id) if report: commentCount = session.query(Comment) \ @@ -1798,7 +1788,7 @@ def addComment(self, report_id, comment_data): codechecker_api_shared.ttypes.ErrorCode.GENERAL, 'The comment message can not be empty!') - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: report = session.query(Report).get(report_id) if report: comment = self.__add_comment(report.bug_id, @@ -1829,9 +1819,9 @@ def updateComment(self, comment_id, content): codechecker_api_shared.ttypes.ErrorCode.GENERAL, 'The comment message can not be empty!') - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: - user = self.__get_username() + user = self._get_username() comment = session.query(Comment).get(comment_id) if comment: @@ -1875,9 +1865,9 @@ def removeComment(self, comment_id): """ self.__require_access() - user = self.__get_username() + user = self._get_username() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: comment = session.query(Comment).get(comment_id) if comment: @@ -1890,7 +1880,7 @@ def removeComment(self, comment_id): LOG.info("Comment '%s...' was removed from bug hash '%s' by " "'%s'.", comment.message[:10], comment.bug_hash, - self.__get_username()) + self._get_username()) return True else: @@ -1949,7 +1939,7 @@ def getSourceFileData(self, fileId, fileContent, encoding): - enum Encoding """ self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: sourcefile = session.query(File).get(fileId) if sourcefile is None: @@ -1974,7 +1964,7 @@ def getSourceFileData(self, fileId, fileContent, encoding): @timeit def getLinesInSourceFileContents(self, lines_in_files_requested, encoding): self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: res = defaultdict(lambda: defaultdict(str)) for lines_in_file in lines_in_files_requested: if lines_in_file.fileId is None: @@ -2006,7 +1996,7 @@ def getCheckerCounts(self, run_ids, report_filter, cmp_data, limit, limit = verify_limit_range(limit) results = [] - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -2060,7 +2050,7 @@ def getAnalyzerNameCounts(self, run_ids, report_filter, cmp_data, limit, limit = verify_limit_range(limit) results = {} - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -2103,7 +2093,7 @@ def getSeverityCounts(self, run_ids, report_filter, cmp_data): """ self.__require_access() results = {} - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -2142,7 +2132,7 @@ def getCheckerMsgCounts(self, run_ids, report_filter, cmp_data, limit, limit = verify_limit_range(limit) results = {} - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -2183,7 +2173,7 @@ def getReviewStatusCounts(self, run_ids, report_filter, cmp_data): """ self.__require_access() results = defaultdict(int) - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -2232,7 +2222,7 @@ def getFileCounts(self, run_ids, report_filter, cmp_data, limit, offset): limit = verify_limit_range(limit) results = {} - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -2282,7 +2272,7 @@ def getRunHistoryTagCounts(self, run_ids, report_filter, cmp_data, limit, limit = verify_limit_range(limit) results = [] - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -2363,7 +2353,7 @@ def getDetectionStatusCounts(self, run_ids, report_filter, cmp_data): """ self.__require_access() results = {} - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: filter_expression, join_tables = process_report_filter( session, run_ids, report_filter, cmp_data) @@ -2408,7 +2398,7 @@ def getFailedFiles(self, run_ids): self.__require_access() res = defaultdict(list) - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: query, sub_q = get_failed_files_query( session, run_ids, [AnalyzerStatistic.failed_files, Run.name], [RunHistory.run_id]) @@ -2451,8 +2441,8 @@ def removeRunResults(self, run_ids): failed = True return not failed - def __removeReports(self, session, report_ids, - chunk_size=SQLITE_MAX_VARIABLE_NUMBER): + def _removeReports(self, session, report_ids, + chunk_size=SQLITE_MAX_VARIABLE_NUMBER): """ Removing reports in chunks. """ @@ -2472,7 +2462,7 @@ def removeRunReports(self, run_ids, report_filter, cmp_data): if cmp_data and cmp_data.runIds: run_ids.extend(cmp_data.runIds) - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: check_remove_runs_lock(session, run_ids) try: @@ -2484,13 +2474,13 @@ def removeRunReports(self, run_ids, report_filter, cmp_data): reports_to_delete = [r[0] for r in q] if reports_to_delete: - self.__removeReports(session, reports_to_delete) + self._removeReports(session, reports_to_delete) session.commit() session.close() LOG.info("The following reports were removed by '%s': %s", - self.__get_username(), reports_to_delete) + self._get_username(), reports_to_delete) except Exception as ex: session.rollback() LOG.error("Database cleanup failed.") @@ -2498,7 +2488,7 @@ def removeRunReports(self, run_ids, report_filter, cmp_data): return False # Remove unused data (files, comments, etc.) from the database. - db_cleanup.remove_unused_data(self.__Session) + db_cleanup.remove_unused_data(self._Session) return True @@ -2508,7 +2498,7 @@ def removeRun(self, run_id, run_filter): self.__require_store() # Remove the whole run. - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: check_remove_runs_lock(session, [run_id]) if not run_filter: @@ -2523,10 +2513,10 @@ def removeRun(self, run_id, run_filter): runs = run_filter.names if run_filter.names else run_filter.ids LOG.info("Runs '%s' were removed by '%s'.", runs, - self.__get_username()) + self._get_username()) # Remove unused data (files, comments, etc.) from the database. - db_cleanup.remove_unused_data(self.__Session) + db_cleanup.remove_unused_data(self._Session) return True @@ -2541,7 +2531,7 @@ def updateRunData(self, run_id, new_run_name): raise codechecker_api_shared.ttypes.RequestFailed( codechecker_api_shared.ttypes.ErrorCode.GENERAL, msg) - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: check_new_run_name = session.query(Run) \ .filter(Run.name == new_run_name) \ .all() @@ -2561,7 +2551,7 @@ def updateRunData(self, run_id, new_run_name): LOG.info("Run name '%s' (%d) was changed to %s by '%s'.", old_run_name, run_id, new_run_name, - self.__get_username()) + self._get_username()) return True else: @@ -2589,9 +2579,9 @@ def addSourceComponent(self, name, value, description): Adds a new source if it does not exist or updates an old one. """ self.__require_admin() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: component = session.query(SourceComponent).get(name) - user = self.__auth_session.user if self.__auth_session else None + user = self._auth_session.user if self._auth_session else None if component: component.value = value.encode('utf-8') @@ -2615,7 +2605,7 @@ def getSourceComponents(self, component_filter): Returns the available source components. """ self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: q = session.query(SourceComponent) if component_filter: @@ -2652,13 +2642,13 @@ def removeSourceComponent(self, name): """ self.__require_admin() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: component = session.query(SourceComponent).get(name) if component: session.delete(component) session.commit() LOG.info("Source component '%s' has been removed by '%s'", - name, self.__get_username()) + name, self._get_username()) return True else: msg = 'Source component ' + str(name) + \ @@ -2674,7 +2664,7 @@ def getMissingContentHashes(self, file_hashes): if not file_hashes: return [] - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: q = session.query(FileContent) \ .options(sqlalchemy.orm.load_only('content_hash')) \ @@ -2683,602 +2673,23 @@ def getMissingContentHashes(self, file_hashes): return list(set(file_hashes) - set([fc.content_hash for fc in q])) - def __store_source_files(self, source_root, filename_to_hash, - trim_path_prefixes): - """ - Storing file contents from plist. - """ - - file_path_to_id = {} - - for file_name, file_hash in filename_to_hash.items(): - source_file_name = os.path.join(source_root, - file_name.strip("/")) - source_file_name = os.path.realpath(source_file_name) - LOG.debug("Storing source file: %s", source_file_name) - trimmed_file_path = util.trim_path_prefixes(file_name, - trim_path_prefixes) - - if not os.path.isfile(source_file_name): - # The file was not in the ZIP file, because we already - # have the content. Let's check if we already have a file - # record in the database or we need to add one. - - LOG.debug('%s not found or already stored.', trimmed_file_path) - with DBSession(self.__Session) as session: - fid = store_handler.addFileRecord(session, - trimmed_file_path, - file_hash) - if not fid: - LOG.error("File ID for %s is not found in the DB with " - "content hash %s. Missing from ZIP?", - source_file_name, file_hash) - file_path_to_id[trimmed_file_path] = fid - LOG.debug("%d fileid found", fid) - continue - - with DBSession(self.__Session) as session: - file_path_to_id[trimmed_file_path] = \ - store_handler.addFileContent(session, - trimmed_file_path, - source_file_name, - file_hash, - None) - - return file_path_to_id - - def __process_report_file( - self, - report_file_path: str, - session: DBSession, - source_root: str, - run_id: int, - file_path_to_id: Dict[str, int], - run_history_time: datetime, - severity_map: webserver_context.SeverityMap, - wrong_src_code_comments: List[str], - skip_handler: Optional[skiplist_handler.SkipListHandler], - trim_path_prefixes: List[str], - mip: MetadataInfoParser, - already_added_report_hashes: Set[str], - new_report_hashes: Set[str], - hash_map_reports: Dict[str, List[Any]], - all_report_checkers: Set[str] - ) -> bool: - """ - Process and save reports from the given report file to the database. - """ - try: - files, reports = plist_parser.parse_plist_file(report_file_path) - except Exception as ex: - LOG.warning('Parsing the plist failed: %s', str(ex)) - return False - - if not reports: - return True - - trimmed_files = {} - file_ids = {} - missing_ids_for_files = [] - - for k, v in files.items(): - trimmed_files[k] = \ - util.trim_path_prefixes(v, trim_path_prefixes) - - for file_name in trimmed_files.values(): - file_id = file_path_to_id.get(file_name, -1) - if file_id == -1: - missing_ids_for_files.append(file_name) - continue - - file_ids[file_name] = file_id - - if missing_ids_for_files: - LOG.warning("Failed to get file path id for '%s'!", - ' '.join(missing_ids_for_files)) - return False - - def set_review_status(report: Any): - """ - Set review status for the given report if there is any source code - comment. - """ - checker_name = report.main['check_name'] - last_report_event = report.bug_path[-1] - - # The original file path is needed here not the trimmed - # because the source files are extracted as the original - # file path. - file_name = files[last_report_event['location']['file']] - - source_file_name = os.path.realpath( - os.path.join(source_root, file_name.strip("/"))) - - # Check and store source code comments. - if not os.path.isfile(source_file_name): - return - - report_line = last_report_event['location']['line'] - source_file = os.path.basename(file_name) - - src_comment_data = parse_codechecker_review_comment( - source_file_name, report_line, checker_name) - - if len(src_comment_data) == 1: - status = src_comment_data[0]['status'] - rw_status = ttypes.ReviewStatus.FALSE_POSITIVE - if status == 'confirmed': - rw_status = ttypes.ReviewStatus.CONFIRMED - elif status == 'intentional': - rw_status = ttypes.ReviewStatus.INTENTIONAL - - self._setReviewStatus( - session, report.report_hash, rw_status, - src_comment_data[0]['message'], run_history_time) - elif len(src_comment_data) > 1: - LOG.warning( - "Multiple source code comment can be found " - "for '%s' checker in '%s' at line %s. " - "This bug will not be suppressed!", - checker_name, source_file, report_line) - - wrong_src_code_comments.append( - f"{source_file}|{report_line}|{checker_name}") - - for report in reports: - all_report_checkers.add(report.check_name) - - if skip_handler and skip_handler.should_skip(report.file_path): - continue - - report.trim_path_prefixes(trim_path_prefixes) - - bug_paths, bug_events, bug_extended_data = \ - store_handler.collect_paths_events(report, file_ids, - trimmed_files) - report_path_hash = get_report_path_hash(report) - if report_path_hash in already_added_report_hashes: - LOG.debug('Not storing report. Already added: %s', report) - continue - - LOG.debug("Storing report to the database...") - - bug_id = report.report_hash - - detection_status = 'new' - detected_at = run_history_time - - if bug_id in hash_map_reports: - old_report = hash_map_reports[bug_id][0] - old_status = old_report.detection_status - detection_status = 'reopened' \ - if old_status == 'resolved' else 'unresolved' - detected_at = old_report.detected_at - - analyzer_name = get_analyzer_name( - report.check_name, mip.checker_to_analyzer, report.metadata) - - report_id = store_handler.addReport( - session, run_id, file_ids[report.file_path], report.main, - bug_paths, bug_events, bug_extended_data, detection_status, - detected_at, severity_map, analyzer_name) - - new_report_hashes.add(bug_id) - already_added_report_hashes.add(report_path_hash) - - set_review_status(report) - - LOG.debug("Storing report done. ID=%d", report_id) - - return True - - def __store_reports(self, session, report_dir, source_root, run_id, - file_path_to_id, run_history_time, severity_map, - wrong_src_code_comments, trim_path_prefixes): - """ - Parse up and store the plist report files. - """ - def get_skip_handler( - report_dir: str - ) -> Optional[skiplist_handler.SkipListHandler]: - """ Get a skip list handler based on the given report directory.""" - skip_file_path = os.path.join(report_dir, 'skip_file') - if not os.path.exists(skip_file_path): - return - - LOG.debug("Pocessing skip file %s", skip_file_path) - try: - with open(skip_file_path, - encoding="utf-8", errors="ignore") as f: - skip_content = f.read() - LOG.debug(skip_content) - - return skiplist_handler.SkipListHandler(skip_content) - except (IOError, OSError) as err: - LOG.warning("Failed to open skip file: %s", err) - - all_reports = session.query(Report) \ - .filter(Report.run_id == run_id) \ - .all() - - hash_map_reports = defaultdict(list) - for report in all_reports: - hash_map_reports[report.bug_id].append(report) - - already_added_report_hashes = set() - new_report_hashes = set() - enabled_checkers = set() - disabled_checkers = set() - all_report_checkers = set() - - # Processing PList files. - for root_dir_path, _, report_file_paths in os.walk(report_dir): - LOG.debug("Get reports from '%s' directory", root_dir_path) - - skip_handler = get_skip_handler(root_dir_path) - - metadata_file_path = os.path.join(root_dir_path, 'metadata.json') - mip = MetadataInfoParser(metadata_file_path) - - enabled_checkers.update(mip.enabled_checkers) - disabled_checkers.update(mip.disabled_checkers) - - for f in report_file_paths: - if not f.endswith('.plist'): - continue - - LOG.debug("Parsing input file '%s'", f) - - report_file_path = os.path.join(root_dir_path, f) - self.__process_report_file( - report_file_path, session, source_root, run_id, - file_path_to_id, run_history_time, severity_map, - wrong_src_code_comments, skip_handler, trim_path_prefixes, - mip, already_added_report_hashes, new_report_hashes, - hash_map_reports, all_report_checkers) - - # If a checker was found in a plist file it can not be disabled so we - # will add this to the enabled checkers list and remove this checker - # from the disabled checkers list. - # Also if multiple report directories are stored and a checker was - # enabled in one report directory but it was disabled in another - # directory we will mark this checker as enabled. - enabled_checkers |= all_report_checkers - disabled_checkers -= all_report_checkers - - reports_to_delete = set() - for bug_hash, reports in hash_map_reports.items(): - if bug_hash in new_report_hashes: - reports_to_delete.update([x.id for x in reports]) - else: - for report in reports: - # We set the fix date of a report only if the report - # has not been fixed before. - if report.fixed_at: - continue - - checker = report.checker_id - if checker in disabled_checkers: - report.detection_status = 'off' - elif checker_is_unavailable(checker, enabled_checkers): - report.detection_status = 'unavailable' - else: - report.detection_status = 'resolved' - - report.fixed_at = run_history_time - - if reports_to_delete: - self.__removeReports(session, list(reports_to_delete)) - - @staticmethod - @exc_to_thrift_reqfail - def __store_run_lock(session, name, username): - """ - Store a RunLock record for the given run name into the database. - """ - try: - # If the run can be stored, we need to lock it first. If there is - # already a lock in the database for the given run name which is - # expired and multiple processes are trying to get this entry from - # the database for update we may get the following exception: - # could not obtain lock on row in relation "run_locks" - # This is the reason why we have to wrap this query to a try/except - # block. - run_lock = session.query(RunLock) \ - .filter(RunLock.name == name) \ - .with_for_update(nowait=True).one_or_none() - except (sqlalchemy.exc.OperationalError, - sqlalchemy.exc.ProgrammingError) as ex: - LOG.error("Failed to get run lock for '%s': %s", name, ex) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - "Someone is already storing to the same run. Please wait " - "while the other storage is finished and try it again.") - - if not run_lock: - # If there is no lock record for the given run name, the run - # is not locked -- create a new lock. - run_lock = RunLock(name, username) - session.add(run_lock) - elif run_lock.has_expired( - db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE): - # There can be a lock in the database, which has already - # expired. In this case, we assume that the previous operation - # has failed, and thus, we can re-use the already present lock. - run_lock.touch() - run_lock.username = username - else: - # In case the lock exists and it has not expired, we must - # consider the run a locked one. - when = run_lock.when_expires( - db_cleanup.RUN_LOCK_TIMEOUT_IN_DATABASE) - - username = run_lock.username if run_lock.username is not None \ - else "another user" - - LOG.info("Refusing to store into run '%s' as it is locked by " - "%s. Lock will expire at '%s'.", name, username, when) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - "The run named '{0}' is being stored into by {1}. If the " - "other store operation has failed, this lock will expire " - "at '{2}'.".format(name, username, when)) - - # At any rate, if the lock has been created or updated, commit it - # into the database. - try: - session.commit() - except (sqlalchemy.exc.IntegrityError, - sqlalchemy.orm.exc.StaleDataError): - # The commit of this lock can fail. - # - # In case two store ops attempt to lock the same run name at the - # same time, committing the lock in the transaction that commits - # later will result in an IntegrityError due to the primary key - # constraint. - # - # In case two store ops attempt to lock the same run name with - # reuse and one of the operation hangs long enough before COMMIT - # so that the other operation commits and thus removes the lock - # record, StaleDataError is raised. In this case, also consider - # the run locked, as the data changed while the transaction was - # waiting, as another run wholly completed. - - LOG.info("Run '%s' got locked while current transaction " - "tried to acquire a lock. Considering run as locked.", - name) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - "The run named '{0}' is being stored into by another " - "user.".format(name)) - - @staticmethod - @exc_to_thrift_reqfail - def __free_run_lock(session, name): - """ - Remove the lock from the database for the given run name. - """ - # Using with_for_update() here so the database (in case it supports - # this operation) locks the lock record's row from any other access. - run_lock = session.query(RunLock) \ - .filter(RunLock.name == name) \ - .with_for_update(nowait=True).one() - session.delete(run_lock) - session.commit() - - def __check_run_limit(self, run_name): - """ - Checks the maximum allowed of uploadable runs for the current product. - """ - max_run_count = self.__manager.get_max_run_count() - - with DBSession(self.__config_database) as session: - product = session.query(Product).get(self.__product.id) - if product.run_limit: - max_run_count = product.run_limit - - # Session that handles constraints on the run. - with DBSession(self.__Session) as session: - if max_run_count: - LOG.debug("Check the maximum number of allowed " - "runs which is %d", max_run_count) - - run = session.query(Run) \ - .filter(Run.name == run_name) \ - .one_or_none() - - # If max_run_count is not set in the config file, it will allow - # the user to upload unlimited runs. - - run_count = session.query(Run.id).count() - - # If we are not updating a run or the run count is reached the - # limit it will throw an exception. - if not run and run_count >= max_run_count: - remove_run_count = run_count - max_run_count + 1 - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, - 'You reached the maximum number of allowed runs ' - '({0}/{1})! Please remove at least {2} run(s) before ' - 'you try it again.'.format(run_count, - max_run_count, - remove_run_count)) - @exc_to_thrift_reqfail @timeit def massStoreRun(self, name, tag, version, b64zip, force, trim_path_prefixes, description): self.__require_store() - start_time = time.time() - - user = self.__auth_session.user if self.__auth_session else None - - # Check constraints of the run. - self.__check_run_limit(name) - - with DBSession(self.__Session) as session: - ThriftRequestHandler.__store_run_lock(session, name, user) - - wrong_src_code_comments = [] - try: - with TemporaryDirectory() as zip_dir: - LOG.info("[%s] Unzip storage file...", name) - zip_size = unzip(b64zip, zip_dir) - LOG.info("[%s] Unzip storage file done.", name) - if zip_size == 0: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes. - ErrorCode.GENERAL, - "The received zip file content is empty" - " nothing was stored.") - - LOG.debug("Using unzipped folder '%s'", zip_dir) - - source_root = os.path.join(zip_dir, 'root') - report_dir = os.path.join(zip_dir, 'reports') - metadata_file = os.path.join(report_dir, 'metadata.json') - content_hash_file = os.path.join(zip_dir, - 'content_hashes.json') - - filename_to_hash = \ - util.load_json_or_empty(content_hash_file, {}) - - LOG.info("[%s] Store source files...", name) - file_path_to_id = self.__store_source_files(source_root, - filename_to_hash, - trim_path_prefixes) - LOG.info("[%s] Store source files done.", name) - - run_history_time = datetime.now() - - mip = MetadataInfoParser(metadata_file) - - command = '' - if len(mip.check_commands) == 1: - command = list(mip.check_commands)[0] - elif len(mip.check_commands) > 1: - command = "multiple analyze calls: " + \ - '; '.join(mip.check_commands) - - durations = 0 - if mip.check_durations: - # Round the duration to seconds. - durations = int(sum(mip.check_durations)) - - # When we use multiple server instances and we try to run - # multiple storage to each server which contain at least two - # reports which have the same report hash and have source code - # comments it is possible that the following exception will be - # thrown: (psycopg2.extensions.TransactionRollbackError) - # deadlock detected. - # The problem is that the report hash is the key for the - # review data table and both of the store actions try to - # update the same review data row. - # Neither of the two processes can continue, and they will wait - # for each other indefinitely. PostgreSQL in this case will - # terminate one transaction with the above exception. - # For this reason in case of failure we will wait some seconds - # and try to run the storage again. - # For more information see #2655 and #2653 issues on github. - max_num_of_tries = 3 - num_of_tries = 0 - sec_to_wait_after_failure = 60 - while True: - try: - # This session's transaction buffer stores the actual - # run data into the database. - with DBSession(self.__Session) as session: - # Load the lock record for "FOR UPDATE" so that the - # transaction that handles the run's store - # operations has a lock on the database row itself. - run_lock = session.query(RunLock) \ - .filter(RunLock.name == name) \ - .with_for_update(nowait=True).one() - - # Do not remove this seemingly dummy print, we need - # to make sure that the execution of the SQL - # statement is not optimised away and the fetched - # row is not garbage collected. - LOG.debug("Storing into run '%s' locked at '%s'.", - name, run_lock.locked_at) - - # Actual store operation begins here. - user_name = self.__get_username() - run_id = store_handler.addCheckerRun( - session, command, name, tag, user_name, - run_history_time, version, force, - mip.cc_version, mip.analyzer_statistics, - description) - - LOG.info("[%s] Store reports...", name) - self.__store_reports( - session, report_dir, source_root, run_id, - file_path_to_id, run_history_time, - self.__context.severity_map, - wrong_src_code_comments, trim_path_prefixes) - LOG.info("[%s] Store reports done.", name) - - store_handler.setRunDuration(session, - run_id, - durations) - - store_handler.finishCheckerRun(session, run_id) - - session.commit() - - LOG.info("'%s' stored results (%s KB " - "/decompressed/) to run '%s' in %s " - "seconds.", user_name, - round(zip_size / 1024), name, - round(time.time() - start_time, 2)) - - return run_id - except (sqlalchemy.exc.OperationalError, - sqlalchemy.exc.ProgrammingError) as ex: - num_of_tries += 1 - - if num_of_tries == max_num_of_tries: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes. - ErrorCode.DATABASE, - "Storing reports to the database failed: " - "{0}".format(ex)) - - LOG.error("Storing reports of '%s' run failed: " - "%s.\nWaiting %d sec before trying to store " - "it again!", name, ex, - sec_to_wait_after_failure) - time.sleep(sec_to_wait_after_failure) - sec_to_wait_after_failure *= 2 - except Exception as ex: - LOG.error("Failed to store results: %s", ex) - import traceback - traceback.print_exc() - raise - finally: - # In any case if the "try" block's execution began, a run lock must - # exist, which can now be removed, as storage either completed - # successfully, or failed in a detectable manner. - # (If the failure is undetectable, the coded grace period expiry - # of the lock will allow further store operations to the given - # run name.) - with DBSession(self.__Session) as session: - ThriftRequestHandler.__free_run_lock(session, name) - - if wrong_src_code_comments: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.SOURCE_FILE, - "Multiple source code comment can be found with the same " - "checker name for same bug!", - wrong_src_code_comments) + from codechecker_server.api.mass_store_run import MassStoreRun + m = MassStoreRun(self, name, tag, version, b64zip, force, + trim_path_prefixes, description) + return m.store() @exc_to_thrift_reqfail @timeit def allowsStoringAnalysisStatistics(self): self.__require_store() - return True if self.__manager.get_analysis_statistics_dir() else False + return True if self._manager.get_analysis_statistics_dir() else False @exc_to_thrift_reqfail @timeit @@ -3288,13 +2699,13 @@ def getAnalysisStatisticsLimits(self): cfg = dict() # Get the limit of failure zip size. - failure_zip_size = self.__manager.get_failure_zip_size() + failure_zip_size = self._manager.get_failure_zip_size() if failure_zip_size: cfg[ttypes.StoreLimitKind.FAILURE_ZIP_SIZE] = failure_zip_size # Get the limit of compilation database size. compilation_database_size = \ - self.__manager.get_compilation_database_size() + self._manager.get_compilation_database_size() if compilation_database_size: cfg[ttypes.StoreLimitKind.COMPILATION_DATABASE_SIZE] = \ compilation_database_size @@ -3306,11 +2717,11 @@ def getAnalysisStatisticsLimits(self): def storeAnalysisStatistics(self, run_name, b64zip): self.__require_store() - report_dir_store = self.__manager.get_analysis_statistics_dir() + report_dir_store = self._manager.get_analysis_statistics_dir() if report_dir_store: try: product_dir = os.path.join(report_dir_store, - self.__product.endpoint) + self._product.endpoint) # Create report store directory. if not os.path.exists(product_dir): os.makedirs(product_dir) @@ -3335,7 +2746,7 @@ def getAnalysisStatistics(self, run_id, run_history_id): analyzer_statistics = {} - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: run_ids = None if run_id is None else [run_id] run_history_ids = None if run_history_id is None \ else [run_history_id] @@ -3361,7 +2772,7 @@ def getAnalysisStatistics(self, run_id, run_history_id): def exportData(self, run_filter): self.__require_access() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: # Logic for getting comments comment_data_list = defaultdict(list) @@ -3405,7 +2816,7 @@ def exportData(self, run_filter): @timeit def importData(self, exportData): self.__require_admin() - with DBSession(self.__Session) as session: + with DBSession(self._Session) as session: # Logic for importing comments comment_bug_ids = list(exportData.comments.keys()) diff --git a/web/server/codechecker_server/api/store_handler.py b/web/server/codechecker_server/api/store_handler.py deleted file mode 100644 index db35538777..0000000000 --- a/web/server/codechecker_server/api/store_handler.py +++ /dev/null @@ -1,599 +0,0 @@ -# ------------------------------------------------------------------------- -# -# Part of the CodeChecker project, under the Apache License v2.0 with -# LLVM Exceptions. See LICENSE for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ------------------------------------------------------------------------- -""" -Helpers to store analysis reports. -""" - -import base64 -from datetime import datetime -from hashlib import sha256 -import os -import zlib - -import sqlalchemy - -import codechecker_api_shared -from codechecker_api.codeCheckerDBAccess_v6 import ttypes - -from codechecker_common.logger import get_logger -from codechecker_common.util import load_json_or_empty - -from ..database.run_db_model import AnalyzerStatistic, \ - BugPathEvent, BugReportPoint, File, Run, RunHistory, Report, FileContent, \ - ExtendedReportData - -from .thrift_enum_helper import report_extended_data_type_str - -LOG = get_logger('system') - - -def metadata_info(metadata_file): - check_commands = [] - check_durations = [] - cc_version = None - analyzer_statistics = {} - checkers = {} - - if not os.path.isfile(metadata_file): - return check_commands, check_durations, cc_version, \ - analyzer_statistics, checkers - - metadata_dict = load_json_or_empty(metadata_file, {}) - - if 'command' in metadata_dict: - check_commands.append(metadata_dict['command']) - if 'timestamps' in metadata_dict: - check_durations.append( - float(metadata_dict['timestamps']['end'] - - metadata_dict['timestamps']['begin'])) - - # Get CodeChecker version. - cc_version = metadata_dict.get('versions', {}).get('codechecker') - - # Get analyzer statistics. - analyzer_statistics = metadata_dict.get('analyzer_statistics', {}) - - checkers = metadata_dict.get('checkers', {}) - - return check_commands, check_durations, cc_version, analyzer_statistics, \ - checkers - - -def collect_paths_events(report, file_ids, files): - """ - This function creates the BugPathPos and BugPathEvent objects which belong - to a report. - - report -- A report object from the parsed plist file. - file_ids -- A dictionary which maps the file paths to file IDs in the - database. - files -- A list containing the file paths from the parsed plist file. The - order of this list must be the same as in the plist file. - - #TODO Multiple ranges could belong to an event or control node. - Only the first range from the list of ranges is stored into the - database. Further improvement can be to store and view all ranges - if there are more than one. - """ - bug_paths = [] - bug_events = [] - bug_extended_data = [] - - events = [i for i in report.bug_path if i.get('kind') == 'event'] - - # Create remaining data for bugs and send them to the server. In plist - # file the source and target of the arrows are provided as starting and - # ending ranges of the arrow. The path A->B->C is given as A->B and - # B->C, thus range B is provided twice. So in the loop only target - # points of the arrows are stored, and an extra insertion is done for - # the source of the first arrow before the loop. - report_path = [i for i in report.bug_path if i.get('kind') == 'control'] - - if report_path: - start_range = report_path[0]['edges'][0]['start'] - start1_line = start_range[0]['line'] - start1_col = start_range[0]['col'] - start2_line = start_range[1]['line'] - start2_col = start_range[1]['col'] - source_file_path = files[start_range[1]['file']] - bug_paths.append(ttypes.BugPathPos( - start1_line, - start1_col, - start2_line, - start2_col, - file_ids[source_file_path])) - - for path in report_path: - try: - end_range = path['edges'][0]['end'] - end1_line = end_range[0]['line'] - end1_col = end_range[0]['col'] - end2_line = end_range[1]['line'] - end2_col = end_range[1]['col'] - source_file_path = files[end_range[1]['file']] - bug_paths.append(ttypes.BugPathPos( - end1_line, - end1_col, - end2_line, - end2_col, - file_ids[source_file_path])) - except IndexError: - # Edges might be empty nothing can be stored. - continue - - for event in events: - file_path = files[event['location']['file']] - - start_loc = event['location'] - end_loc = event['location'] - # Range can provide more precise location information. - # Use that if available. - ranges = event.get("ranges") - if ranges: - start_loc = ranges[0][0] - end_loc = ranges[0][1] - - bug_events.append(ttypes.BugPathEvent( - start_loc['line'], - start_loc['col'], - end_loc['line'], - end_loc['col'], - event['message'], - file_ids[file_path])) - - for macro in report.macro_expansions: - if not macro['expansion']: - continue - - file_path = files[macro['location']['file']] - - start_loc = macro['location'] - end_loc = macro['location'] - # Range can provide more precise location information. - # Use that if available. - ranges = macro.get("ranges") - if ranges: - start_loc = ranges[0][0] - end_loc = ranges[0][1] - - bug_extended_data.append(ttypes.ExtendedReportData( - ttypes.ExtendedReportDataType.MACRO, - start_loc['line'], - start_loc['col'], - end_loc['line'], - end_loc['col'], - macro['expansion'], - file_ids[file_path])) - - for note in report.notes: - if not note['message']: - continue - - file_path = files[note['location']['file']] - - start_loc = note['location'] - end_loc = note['location'] - # Range can provide more precise location information. - # Use that if available. - ranges = note.get("ranges") - if ranges: - start_loc = ranges[0][0] - end_loc = ranges[0][1] - - bug_extended_data.append(ttypes.ExtendedReportData( - ttypes.ExtendedReportDataType.NOTE, - start_loc['line'], - start_loc['col'], - end_loc['line'], - end_loc['col'], - note['message'], - file_ids[file_path])) - - return bug_paths, bug_events, bug_extended_data, - - -def store_bug_events(session, bugevents, report_id): - """ - """ - for i, event in enumerate(bugevents): - bpe = BugPathEvent(event.startLine, - event.startCol, - event.endLine, - event.endCol, - i, - event.msg, - event.fileId, - report_id) - session.add(bpe) - - -def store_bug_path(session, bugpath, report_id): - for i, piece in enumerate(bugpath): - brp = BugReportPoint(piece.startLine, - piece.startCol, - piece.endLine, - piece.endCol, - i, - piece.fileId, - report_id) - session.add(brp) - - -def store_extended_bug_data(session, extended_data, report_id): - """ - Add extended bug data objects to the database session. - """ - for data in extended_data: - data_type = report_extended_data_type_str(data.type) - red = ExtendedReportData(data.startLine, - data.startCol, - data.endLine, - data.endCol, - data.message, - data.fileId, - report_id, - data_type) - session.add(red) - - -def is_same_event_path(report_id, events, session): - """ - Checks if the given event path is the same as the one in the - events argument. - """ - try: - q = session.query(BugPathEvent) \ - .filter(BugPathEvent.report_id == report_id) \ - .order_by(BugPathEvent.order) - - for i, point2 in enumerate(q): - if i == len(events): - return False - - point1 = events[i] - - file1name = os.path.basename(session.query(File). - get(point1.fileId).filepath) - file2name = os.path.basename(session.query(File). - get(point2.file_id).filepath) - - if point1.startCol != point2.col_begin or \ - point1.endCol != point2.col_end or \ - file1name != file2name or \ - point1.msg != point2.msg: - return False - - return True - - except Exception as ex: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, - str(ex)) - - -def addCheckerRun(session, command, name, tag, username, - run_history_time, version, force, codechecker_version, - statistics, description): - """ - Store checker run related data to the database. - By default updates the results if name already exists. - Using the force flag removes existing analysis results for a run. - """ - try: - LOG.debug("adding checker run") - - run = session.query(Run).filter(Run.name == name).one_or_none() - - if run and force: - # Clean already collected results. - if not run.can_delete: - # Deletion is already in progress. - msg = "Can't delete " + str(run.id) - LOG.debug(msg) - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.DATABASE, - msg) - - LOG.info('Removing previous analysis results ...') - session.delete(run) - # Not flushing after delete leads to a constraint violation error - # later, when adding run entity with the same name as the old one. - session.flush() - - checker_run = Run(name, version, command) - session.add(checker_run) - session.flush() - run_id = checker_run.id - - elif run: - # There is already a run, update the results. - run.date = datetime.now() - run.command = command - run.duration = -1 - session.flush() - run_id = run.id - else: - # There is no run create new. - checker_run = Run(name, version, command) - session.add(checker_run) - session.flush() - run_id = checker_run.id - - # Add run to the history. - LOG.debug("adding run to the history") - - if tag is not None: - run_history = session.query(RunHistory) \ - .filter(RunHistory.run_id == run_id, - RunHistory.version_tag == tag) \ - .one_or_none() - - if run_history: - run_history.version_tag = None - session.add(run_history) - - compressed_command = zlib.compress(command.encode("utf-8"), - zlib.Z_BEST_COMPRESSION) - run_history = RunHistory(run_id, tag, username, run_history_time, - compressed_command, codechecker_version, - description) - session.add(run_history) - session.flush() - LOG.debug("command store done") - # Create entry for analyzer statistics. - for analyzer_type, res in statistics.items(): - analyzer_version = res.get('version', None) - successful = res.get('successful') - failed = res.get('failed') - failed_sources = res.get('failed_sources') - - if analyzer_version: - LOG.debug(analyzer_version) - analyzer_version \ - = zlib.compress(analyzer_version.encode('utf-8'), - zlib.Z_BEST_COMPRESSION) - - LOG.debug("analyzer version compressed") - compressed_files = None - if failed_sources: - if version == '6.9.0': - failed_sources = ['Unavailable in CodeChecker 6.9.0!'] - - compressed_files = zlib.compress( - '\n'.join(failed_sources).encode('utf-8'), - zlib.Z_BEST_COMPRESSION) - - LOG.debug("failed source compressed") - analyzer_statistics = AnalyzerStatistic(run_history.id, - analyzer_type, - analyzer_version, - successful, - failed, - compressed_files) - LOG.debug("stats added to session") - session.add(analyzer_statistics) - - session.flush() - LOG.debug("stats store done") - return run_id - except Exception as ex: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, - str(ex)) - - -def finishCheckerRun(session, run_id): - """ - """ - try: - LOG.debug("Finishing checker run") - run = session.query(Run).get(run_id) - if not run: - return False - - run.mark_finished() - - return True - - except Exception as ex: - LOG.error(ex) - return False - - -def setRunDuration(session, run_id, duration): - """ - """ - try: - run = session.query(Run).get(run_id) - - if not run: - return False - - run.duration = duration - return True - except Exception as ex: - LOG.error(ex) - return False - - -def addReport(session, - run_id, - file_id, - main_section, - bugpath, - events, - bug_extended_data, - detection_status, - detection_time, - severity_map, - analyzer_name=None): - """ - """ - try: - - checker_name = main_section['check_name'] - severity_name = severity_map.get(checker_name) - severity = ttypes.Severity._NAMES_TO_VALUES[severity_name] - - report = Report(run_id, - main_section['issue_hash_content_of_line_in_context'], - file_id, - main_section['description'], - checker_name or 'NOT FOUND', - main_section['category'], - main_section['type'], - main_section['location']['line'], - main_section['location']['col'], - severity, - detection_status, - detection_time, - len(events), - analyzer_name) - - session.add(report) - session.flush() - - LOG.debug("storing bug path") - store_bug_path(session, bugpath, report.id) - LOG.debug("storing events") - store_bug_events(session, events, report.id) - LOG.debug("storing extended report data") - store_extended_bug_data(session, bug_extended_data, report.id) - - return report.id - - except Exception as ex: - raise codechecker_api_shared.ttypes.RequestFailed( - codechecker_api_shared.ttypes.ErrorCode.GENERAL, - str(ex)) - - -def changePathAndEvents(session, run_id, report_path_map): - report_ids = list(report_path_map.keys()) - - session.query(BugPathEvent) \ - .filter(BugPathEvent.report_id.in_(report_ids)) \ - .delete(synchronize_session=False) - - session.query(BugReportPoint) \ - .filter(BugReportPoint.report_id.in_(report_ids)) \ - .delete(synchronize_session=False) - - for report_id, (bug_path, events) in report_path_map.items(): - store_bug_path(session, bug_path, report_id) - store_bug_events(session, events, report_id) - - -def get_file_content(filepath, encoding): - """Return the file content for the given filepath. - - If the client sent the file contents encoded decode - the file content based on the encoding method. - This encoding is optionally used during network transfer - between the client an the server. - """ - with open(filepath, 'rb') as source_file: - content = source_file.read() - - if encoding == ttypes.Encoding.BASE64: - content = base64.b64decode(content) - return content - - -def addFileContent(session, filepath, source_file_name, content_hash, - encoding): - """ - Add the necessary file contents. If the file is already stored in the - database then its ID returns. If content_hash in None then this function - calculates the content hash. Or if is available at the caller and is - provided then it will not be calculated again. - - This function must not be called between addCheckerRun() and - finishCheckerRun() functions when SQLite database is used! addCheckerRun() - function opens a transaction which is closed by finishCheckerRun() and - since SQLite doesn't support parallel transactions, this API call will - wait until the other transactions finish. In the meantime the run adding - transaction times out. - """ - - source_file_content = None - if not content_hash: - source_file_content = get_file_content(source_file_name, encoding) - hasher = sha256() - hasher.update(source_file_content) - content_hash = hasher.hexdigest() - - file_content = session.query(FileContent).get(content_hash) - if not file_content: - if not source_file_content: - source_file_content = get_file_content(source_file_name, encoding) - try: - compressed_content = zlib.compress(source_file_content, - zlib.Z_BEST_COMPRESSION) - fc = FileContent(content_hash, compressed_content) - session.add(fc) - session.commit() - except sqlalchemy.exc.IntegrityError: - # Other transaction moght have added the same content in - # the meantime. - session.rollback() - - file_record = session.query(File) \ - .filter(File.content_hash == content_hash, - File.filepath == filepath) \ - .one_or_none() - if not file_record: - try: - file_record = File(filepath, content_hash) - session.add(file_record) - session.commit() - except sqlalchemy.exc.IntegrityError as ex: - LOG.error(ex) - # Other transaction might have added the same file in the - # meantime. - session.rollback() - file_record = session.query(File) \ - .filter(File.content_hash == content_hash, - File.filepath == filepath) \ - .one_or_none() - - return file_record.id - - -def addFileRecord(session, filepath, content_hash): - """ - Add the necessary file record pointing to an already existing content. - Returns the added file record id or None, if the content_hash is not found. - - This function must not be called between addCheckerRun() and - finishCheckerRun() functions when SQLite database is used! addCheckerRun() - function opens a transaction which is closed by finishCheckerRun() and - since SQLite doesn't support parallel transactions, this API call will - wait until the other transactions finish. In the meantime the run adding - transaction times out. - """ - file_record = session.query(File) \ - .filter(File.content_hash == content_hash, - File.filepath == filepath) \ - .one_or_none() - if file_record: - return file_record.id - try: - file_record = File(filepath, content_hash) - session.add(file_record) - session.commit() - except sqlalchemy.exc.IntegrityError as ex: - LOG.error(ex) - # Other transaction might have added the same file in the - # meantime. - session.rollback() - file_record = session.query(File) \ - .filter(File.content_hash == content_hash, - File.filepath == filepath).one_or_none() - - return file_record.id if file_record else None diff --git a/web/server/codechecker_server/database/db_cleanup.py b/web/server/codechecker_server/database/db_cleanup.py index 9cdd444c51..c4ddeb5a96 100644 --- a/web/server/codechecker_server/database/db_cleanup.py +++ b/web/server/codechecker_server/database/db_cleanup.py @@ -20,8 +20,9 @@ from codechecker_common.logger import get_logger from .database import DBSession -from .run_db_model import BugPathEvent, BugReportPoint, Comment, File, \ - FileContent, Report, ReviewStatus, RunLock +from .run_db_model import AnalysisInfo, BugPathEvent, BugReportPoint, \ + Comment, File, FileContent, Report, ReportAnalysisInfo, ReviewStatus, \ + RunHistoryAnalysisInfo, RunLock LOG = get_logger('server') RUN_LOCK_TIMEOUT_IN_DATABASE = 30 * 60 # 30 minutes. @@ -86,6 +87,7 @@ def remove_unused_data(session_maker): remove_unused_files(session_maker) remove_unused_comments(session_maker) remove_unused_review_statuses(session_maker) + remove_unused_analysis_info(session_maker) def remove_unused_comments(session_maker): @@ -200,3 +202,30 @@ def upgrade_severity_levels(session_maker, severity_map): except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.ProgrammingError) as ex: LOG.error("Failed to upgrade severity levels: %s", str(ex)) + + +def remove_unused_analysis_info(session_maker): + """ Remove unused analysis information from the database. """ + LOG.debug("Garbage collection of dangling analysis info started...") + + with DBSession(session_maker) as session: + try: + run_history_analysis_info = session \ + .query(RunHistoryAnalysisInfo.c.analysis_info_id.distinct()) \ + .subquery() + + report_analysis_info = session \ + .query(ReportAnalysisInfo.c.analysis_info_id.distinct()) \ + .subquery() + + session.query(AnalysisInfo) \ + .filter(AnalysisInfo.id.notin_(run_history_analysis_info), + AnalysisInfo.id.notin_(report_analysis_info)) \ + .delete(synchronize_session=False) + + session.commit() + + LOG.debug("Garbage collection of dangling analysis info finished.") + except (sqlalchemy.exc.OperationalError, + sqlalchemy.exc.ProgrammingError) as ex: + LOG.error("Failed to remove dangling analysis info: %s", str(ex)) diff --git a/web/server/codechecker_server/database/run_db_model.py b/web/server/codechecker_server/database/run_db_model.py index 239bfcb9d1..702dbb246c 100644 --- a/web/server/codechecker_server/database/run_db_model.py +++ b/web/server/codechecker_server/database/run_db_model.py @@ -13,7 +13,7 @@ import os from sqlalchemy import MetaData, Column, Integer, UniqueConstraint, String, \ - DateTime, Boolean, ForeignKey, Binary, Enum, Text + DateTime, Boolean, ForeignKey, Binary, Enum, Table, Text from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship from sqlalchemy.sql.expression import true @@ -30,6 +30,16 @@ Base = declarative_base(metadata=CC_META) +class AnalysisInfo(Base): + __tablename__ = 'analysis_info' + + id = Column(Integer, autoincrement=True, primary_key=True) + analyzer_command = Column(Binary) + + def __init__(self, analyzer_command): + self.analyzer_command = analyzer_command + + class Run(Base): __tablename__ = 'runs' @@ -42,13 +52,11 @@ class Run(Base): duration = Column(Integer) # Seconds, -1 if unfinished. name = Column(String) version = Column(String) - command = Column(String) can_delete = Column(Boolean, nullable=False, server_default=true(), default=True) - def __init__(self, name, version, command): - self.date, self.name, self.version, self.command = \ - datetime.now(), name, version, command + def __init__(self, name, version): + self.date, self.name, self.version = datetime.now(), name, version self.duration = -1 def mark_finished(self): @@ -117,6 +125,20 @@ def __init__(self, run_history_id, analyzer_type, version, successful, self.failed_files = failed_files +RunHistoryAnalysisInfo = Table( + 'run_history_analysis_info', + Base.metadata, + Column( + 'run_history_id', + Integer, + ForeignKey('run_histories.id', + deferrable=True, + initially="DEFERRED", + ondelete="CASCADE")), + Column('analysis_info_id', Integer, ForeignKey('analysis_info.id')) +) + + class RunHistory(Base): __tablename__ = 'run_histories' @@ -128,7 +150,6 @@ class RunHistory(Base): version_tag = Column(String) user = Column(String, nullable=False) time = Column(DateTime, nullable=False) - check_command = Column(Binary) cc_version = Column(String, nullable=True) description = Column(String, nullable=True) @@ -136,15 +157,18 @@ class RunHistory(Base): analyzer_statistics = relationship(AnalyzerStatistic, lazy="joined") + analysis_info = relationship( + "AnalysisInfo", + secondary=RunHistoryAnalysisInfo) + __table_args__ = (UniqueConstraint('run_id', 'version_tag'),) - def __init__(self, run_id, version_tag, user, time, check_command, - cc_version, description): + def __init__(self, run_id, version_tag, user, time, cc_version, + description): self.run_id = run_id self.version_tag = version_tag self.user = user self.time = time - self.check_command = check_command self.cc_version = cc_version self.description = description @@ -281,6 +305,20 @@ def __init__(self, line_begin, col_begin, line_end, col_end, self.type = data_type +ReportAnalysisInfo = Table( + 'report_analysis_info', + Base.metadata, + Column( + 'report_id', + Integer, + ForeignKey('reports.id', + deferrable=True, + initially="DEFERRED", + ondelete="CASCADE")), + Column('analysis_info_id', Integer, ForeignKey('analysis_info.id')) +) + + class Report(Base): __tablename__ = 'reports' @@ -319,6 +357,10 @@ class Report(Base): detected_at = Column(DateTime, nullable=False) fixed_at = Column(DateTime) + analysis_info = relationship( + "AnalysisInfo", + secondary=ReportAnalysisInfo) + # Cascade delete might remove rows, SQLAlchemy warns about this. # To remove warnings about already deleted items set this to False. __mapper_args__ = { diff --git a/web/server/codechecker_server/migrations/config/env.py b/web/server/codechecker_server/migrations/config/env.py index 3dfdc8b5da..92ae0885c5 100644 --- a/web/server/codechecker_server/migrations/config/env.py +++ b/web/server/codechecker_server/migrations/config/env.py @@ -6,6 +6,9 @@ # # ------------------------------------------------------------------------- +import os +import sys + from alembic import context from sqlalchemy import engine_from_config, pool @@ -13,22 +16,13 @@ # access to the values within the .ini file in use. config = context.config -# Add your model's MetaData object here -# for 'autogenerate' support. -try: - from codechecker_server.database.config_db_model import Base -except ImportError: - # Assume we are in the source directory - import sys - import os - - server_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), - "..", "..", "..")) - root_dir = os.path.join(server_dir, '..', '..') - - sys.path.extend([root_dir, server_dir]) +# Add model's MetaData object here for 'autogenerate' support. +sys.path.insert(0, os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "..", ".."))) +sys.path.insert(0, os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", ".."))) - from codechecker_server.database.config_db_model import Base +from codechecker_server.database.config_db_model import Base target_metadata = Base.metadata diff --git a/web/server/codechecker_server/migrations/report/env.py b/web/server/codechecker_server/migrations/report/env.py index b2e01be054..40bfb4466e 100644 --- a/web/server/codechecker_server/migrations/report/env.py +++ b/web/server/codechecker_server/migrations/report/env.py @@ -6,6 +6,9 @@ # # ------------------------------------------------------------------------- +import os +import sys + from alembic import context from sqlalchemy import engine_from_config, pool @@ -13,18 +16,11 @@ # access to the values within the .ini file in use. config = context.config -# Add your model's MetaData object here -# for 'autogenerate' support. -try: - from codechecker_server.database.run_db_model import Base -except ImportError: - # Assume we are in the source directory - import sys - import os - - sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), - "..", "..", ".."))) - from codechecker_server.database.run_db_model import Base +# Add model's MetaData object here for 'autogenerate' support. +sys.path.insert(0, os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "..", ".."))) + +from codechecker_server.database.run_db_model import Base target_metadata = Base.metadata diff --git a/web/server/codechecker_server/migrations/report/versions/dabc6998b8f0_analysis_info_table.py b/web/server/codechecker_server/migrations/report/versions/dabc6998b8f0_analysis_info_table.py new file mode 100644 index 0000000000..0dec233a9d --- /dev/null +++ b/web/server/codechecker_server/migrations/report/versions/dabc6998b8f0_analysis_info_table.py @@ -0,0 +1,153 @@ +"""Analysis info table + +Revision ID: dabc6998b8f0 +Revises: af5d8a21c1e4 +Create Date: 2021-05-13 12:05:55.983746 + +""" + +# revision identifiers, used by Alembic. +revision = 'dabc6998b8f0' +down_revision = 'af5d8a21c1e4' +branch_labels = None +depends_on = None + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + conn = op.get_bind() + ctx = op.get_context() + dialect = ctx.dialect.name + + analysis_info_tbl = op.create_table('analysis_info', + sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), + sa.Column('analyzer_command', sa.Binary(), nullable=True), + sa.PrimaryKeyConstraint('id', name=op.f('pk_analysis_info')) + ) + + run_history_analysis_info_tbl = op.create_table('run_history_analysis_info', + sa.Column('run_history_id', sa.Integer(), nullable=True), + sa.Column('analysis_info_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ['analysis_info_id'], + ['analysis_info.id'], + name=op.f('fk_run_history_analysis_info_analysis_info_id_analysis_info')), + sa.ForeignKeyConstraint( + ['run_history_id'], + ['run_histories.id'], + name=op.f('fk_run_history_analysis_info_run_history_id_run_histories'), + ondelete='CASCADE', initially='DEFERRED', deferrable=True) + ) + + report_analysis_info_tbl = op.create_table('report_analysis_info', + sa.Column('report_id', sa.Integer(), nullable=True), + sa.Column('analysis_info_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ['analysis_info_id'], + ['analysis_info.id'], + name=op.f('fk_report_analysis_info_analysis_info_id_analysis_info')), + sa.ForeignKeyConstraint( + ['report_id'], + ['reports.id'], + name=op.f('fk_report_analysis_info_report_id_reports'), + ondelete='CASCADE', initially='DEFERRED', deferrable=True) + ) + + try: + run_histories = conn.execute(""" + SELECT id, run_id, check_command + FROM run_histories + ORDER BY id DESC + """).fetchall() + + uniqued_analysis_info = {} + run_analysis_info = {} + analysis_info = [] + run_history_analysis_info = [] + for ai_id, (run_history_id, run_id, analyzer_cmd) in enumerate(run_histories, start=1): + if analyzer_cmd not in uniqued_analysis_info: + uniqued_analysis_info[analyzer_cmd] = ai_id + analysis_info.append({ + 'id': ai_id, + 'analyzer_command': analyzer_cmd + }) + + if run_id not in run_analysis_info: + run_analysis_info[run_id] = uniqued_analysis_info[analyzer_cmd] + + run_history_analysis_info.append({ + 'run_history_id': run_history_id, + 'analysis_info_id': uniqued_analysis_info[analyzer_cmd] + }) + + op.bulk_insert( + analysis_info_tbl, analysis_info) + + op.bulk_insert( + run_history_analysis_info_tbl, run_history_analysis_info) + + reports = conn.execute(""" + SELECT id, run_id + FROM reports + """).fetchall() + + report_analysis_info = [] + for report_id, run_id in reports: + if run_id not in run_analysis_info: + continue + + report_analysis_info.append({ + 'report_id': report_id, + 'analysis_info_id': run_analysis_info[run_id] + }) + + op.bulk_insert( + report_analysis_info_tbl, report_analysis_info) + except: + print("Analyzer command data migration failed!") + else: + # If data migration was successfully finished we can remove the + # columns. + if dialect == 'sqlite': + # Unfortunately removing columns in SQLite is not supported. + # 'batch_alter_table' function can be used to remove a column here (it + # will create a new database) but it will clear the table which have + # foreign keys with cascade delete property. Unfortunately disabling + # the pragma foreign key doesn't work here. For this reason we will + # keep these columns in case of SQLite. + + # with op.batch_alter_table('run_histories') as batch_op: + # batch_op.drop_column('check_command') + + # with op.batch_alter_table( + # 'runs', + # reflect_args=[ + # # By default it we don't override the definition of this column + # # we will get the following exception: + # # (sqlite3.OperationalError) default value of column + # # [can_delete] is not constant + # sa.Column( + # 'can_delete', + # sa.Boolean(), + # server_default=sa.sql.true(), + # nullable=False + # ) + # ] + # ) as batch_op: + # batch_op.drop_column('command') + pass + else: + op.drop_column('run_histories', 'check_command') + op.drop_column('runs', 'command') + + +def downgrade(): + op.add_column('runs', + sa.Column('command', sa.VARCHAR(), nullable=True)) + op.add_column('run_histories', + sa.Column('check_command', sa.BLOB(), nullable=True)) + op.drop_table('report_analysis_info') + op.drop_table('run_history_analysis_info') + op.drop_table('analysis_info') diff --git a/web/server/tests/unit/test_store_handler.py b/web/server/tests/unit/test_collect_path_events.py similarity index 88% rename from web/server/tests/unit/test_store_handler.py rename to web/server/tests/unit/test_collect_path_events.py index 6a7877f282..6745530453 100644 --- a/web/server/tests/unit/test_store_handler.py +++ b/web/server/tests/unit/test_collect_path_events.py @@ -15,12 +15,12 @@ from codechecker_common import plist_parser -from codechecker_server.api import store_handler +from codechecker_server.api.mass_store_run import collect_paths_events -class StoreHandler(unittest.TestCase): +class CollectPathEventsTest(unittest.TestCase): """ - Test Store handler features. + Test collecting path events. """ @classmethod @@ -97,9 +97,7 @@ def test_collect_path_events(self): endLine=7, fileId=2) ] - path1, events1, _ = store_handler.collect_paths_events(reports[0], - file_ids, - files) + path1, events1, _ = collect_paths_events(reports[0], file_ids, files) self.assertEqual(path1, report1_path) self.assertEqual(events1, report1_events) @@ -112,9 +110,8 @@ def test_collect_path_events(self): endLine=8, fileId=1) ] - path2, events2, _ = store_handler.collect_paths_events(reports[1], - file_ids, - files) + path2, events2, _ = collect_paths_events(reports[1], file_ids, files) + self.assertEqual(path2, report2_path) self.assertEqual(events2, report2_events) @@ -138,8 +135,7 @@ def test_collect_path_events(self): endLine=14, fileId=1) ] - path, events, _ = store_handler.collect_paths_events(reports[2], - file_ids, - files) + path, events, _ = collect_paths_events(reports[2], file_ids, files) + self.assertEqual(path, report3_path) self.assertEqual(events, report3_events) diff --git a/web/server/vue-cli/e2e/pages/runs.js b/web/server/vue-cli/e2e/pages/runs.js index 5c55728414..9ca465147c 100644 --- a/web/server/vue-cli/e2e/pages/runs.js +++ b/web/server/vue-cli/e2e/pages/runs.js @@ -34,7 +34,7 @@ module.exports = { showDescriptionBtn: "button.description", showHistoryBtn: "a.show-history", showStatisticsBtn: "a.show-statistics", - showCheckCommandBtn: "button.show-check-command", + showCheckCommandBtn: "button.show-analysis-info", openDetectionStatus: "a.detection-status-count", descriptionMenu: ".menuable__content__active.run-description-menu .v-card__text", @@ -61,7 +61,7 @@ module.exports = { } }, checkCommandDialog: { - selector: ".v-dialog__content--active .check-command", + selector: ".v-dialog__content--active .analysis-info", elements: { content: ".container", closeBtn: ".v-card__title button" @@ -88,7 +88,7 @@ module.exports = { elements: { date: ".date", showStatisticsBtn: "a.show-statistics", - showCheckCommandBtn: "button.show-check-command", + showCheckCommandBtn: "button.show-analysis-info", historyEvent: ".v-timeline-item.run-history", baseline: ".compare-events .v-input--checkbox:nth-child(1)", compareTo: ".compare-events .v-input--checkbox:nth-child(2)" diff --git a/web/server/vue-cli/package-lock.json b/web/server/vue-cli/package-lock.json index b9fbe6351e..5b275925bb 100644 --- a/web/server/vue-cli/package-lock.json +++ b/web/server/vue-cli/package-lock.json @@ -6332,8 +6332,8 @@ "dev": true }, "codechecker-api": { - "version": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.39.0.tgz", - "integrity": "sha512-qE7zN/7vTLAr6JrJdM1bYtn6NjxC5D9/WRlRJ8DwfzE9tWKNbLEUjttz5yn+MwNzJlguEkvCjQEA3TcU22SKIw==", + "version": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.40.0.tgz", + "integrity": "sha512-3t92svVHCTP8mmDP/ZCg1nV67XPy2DGxLDrsLhBG6umDnLdtqq1i2AQovGnBc7c2NQZ8EvIbaJKnut3aCAfujw==", "requires": { "thrift": "0.13.0-hotfix.1" } diff --git a/web/server/vue-cli/package.json b/web/server/vue-cli/package.json index b2ae72f0cf..fae9dc11a3 100644 --- a/web/server/vue-cli/package.json +++ b/web/server/vue-cli/package.json @@ -27,7 +27,7 @@ }, "dependencies": { "@mdi/font": "^5.9.55", - "codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.39.0.tgz", + "codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.40.0.tgz", "chart.js": "^2.9.4", "chartjs-plugin-datalabels": "^0.7.0", "codemirror": "^5.60.0", diff --git a/web/server/vue-cli/src/components/Run/CheckCommandDialog.vue b/web/server/vue-cli/src/components/AnalysisInfoDialog.vue similarity index 65% rename from web/server/vue-cli/src/components/Run/CheckCommandDialog.vue rename to web/server/vue-cli/src/components/AnalysisInfoDialog.vue index 63cf785fcc..4df9962e35 100644 --- a/web/server/vue-cli/src/components/Run/CheckCommandDialog.vue +++ b/web/server/vue-cli/src/components/AnalysisInfoDialog.vue @@ -1,7 +1,7 @@