Skip to content

Commit

Permalink
Merge pull request #726 from whisperity/unexpected_char
Browse files Browse the repository at this point in the history
Handle a Thrift exception if file content contains Unicode char that escapes as \u01?? by b64ing source text
  • Loading branch information
bruntib authored Jul 14, 2017
2 parents 8a65ea8 + 240e455 commit ebb1d03
Show file tree
Hide file tree
Showing 9 changed files with 53 additions and 26 deletions.
19 changes: 14 additions & 5 deletions api/report_server.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace js codeCheckerDBAccess
namespace cpp cc.service.codechecker

//=================================================
const string API_VERSION = '5.2'
const string API_VERSION = '5.3'
const i64 MAX_QUERY_SIZE = 500
//=================================================

Expand Down Expand Up @@ -77,6 +77,7 @@ struct ReportDetails{
}

//-----------------------------------------------------------------------------
// TODO: This type is unused.
struct ReportFileData{
1: i64 reportFileId,
2: string reportFileContent
Expand All @@ -90,6 +91,12 @@ enum SortType {
SEVERITY
}

//-----------------------------------------------------------------------------
enum Encoding {
DEFAULT,
BASE64
}

//-----------------------------------------------------------------------------
struct SourceFileData{
1: i64 fileId,
Expand Down Expand Up @@ -183,11 +190,12 @@ service codeCheckerDBAccess {
1: i64 reportId)
throws (1: shared.RequestFailed requestError),

// get file informations if fileContent is true the content of the source file
// will be also returned
// get file information, if fileContent is true the content of the source
// file will be also returned
SourceFileData getSourceFileData(
1: i64 fileId,
2: bool fileContent)
2: bool fileContent,
3: optional Encoding encoding)
throws (1: shared.RequestFailed requestError),

// get the file id from the database for a filepath, returns -1 if not found
Expand Down Expand Up @@ -362,7 +370,8 @@ service codeCheckerDBAccess {

bool addFileContent(
1: i64 file_id,
2: string file_content)
2: string file_content,
3: optional Encoding encoding)
throws (1: shared.RequestFailed requestError),

bool finishCheckerRun(1: i64 run_id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import traceback

import shared
from codeCheckerDBAccess.ttypes import Encoding

from libcodechecker import logger
from libcodechecker import suppress_handler
Expand Down Expand Up @@ -57,7 +58,8 @@ def __store_bugs(self, files, reports, client, analisys_id):

source64 = base64.b64encode(source)
res = client.addFileContent(file_descriptor.fileId,
source64)
source64,
Encoding.BASE64)
if not res:
LOG.debug("Failed to store file content")

Expand Down
13 changes: 6 additions & 7 deletions libcodechecker/cmd/cmd_line_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
# License. See LICENSE.TXT for details.
# -------------------------------------------------------------------------

import base64
from datetime import datetime
import json
import os
import sys

import codeCheckerDBAccess
import shared
from Authentication import ttypes as AuthTypes
import codeCheckerDBAccess
from codeCheckerDBAccess import ttypes

from libcodechecker import suppress_file_handler
Expand Down Expand Up @@ -234,11 +234,10 @@ def getLineFromFile(filename, lineno):
return ""

def getLineFromRemoteFile(client, fid, lineno):
# FIXME: Certain UTF8 file content
# causes connection abort.
source = client.getSourceFileData(fid, True)
i = 1
lines = source.fileContent.split('\n')
# Thrift Python client cannot decode JSONs that contain non '\u00??'
# characters, so we instead ask for a Base64-encoded version.
source = client.getSourceFileData(fid, True, ttypes.Encoding.BASE64)
lines = base64.b64decode(source.fileContent).split('\n')
return "" if len(lines) < lineno else lines[lineno - 1]

def getDiffReportDir(getterFn, baseid, report_dir, suppr, diff_type):
Expand Down
4 changes: 2 additions & 2 deletions libcodechecker/libclient/thrift_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def getRunResults(self, runId, limit, offset, sortType, reportFilters):
pass

@ThriftClientCall
def getSourceFileData(self, fileId, fileContent):
def getSourceFileData(self, fileId, fileContent, encoding):
pass

@ThriftClientCall
Expand Down Expand Up @@ -174,7 +174,7 @@ def needFileContent(self, run_id, filepath):
pass

@ThriftClientCall
def addFileContent(self, file_id, file_content):
def addFileContent(self, file_id, file_content, encoding):
pass

@ThriftClientCall
Expand Down
18 changes: 11 additions & 7 deletions libcodechecker/server/client_db_access_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@
Handle Thrift requests.
"""

from collections import defaultdict
import base64
import codecs
from collections import defaultdict
import os
import zlib

import sqlalchemy
from sqlalchemy.sql.expression import false
from sqlalchemy.sql.expression import true

import shared
from codeCheckerDBAccess import constants
Expand Down Expand Up @@ -826,11 +824,12 @@ def getFileId(self, run_id, path):
msg)

@timeit
def getSourceFileData(self, fileId, fileContent):
def getSourceFileData(self, fileId, fileContent, encoding):
"""
Parameters:
- fileId
- fileContent
- enum Encoding
"""
session = self.__session
try:
Expand All @@ -843,7 +842,10 @@ def getSourceFileData(self, fileId, fileContent):
if fileContent and sourcefile.content:
source = zlib.decompress(sourcefile.content)

source = codecs.decode(source, 'utf-8', 'replace')
if not encoding or encoding == Encoding.DEFAULT:
source = codecs.decode(source, 'utf-8', 'replace')
elif encoding == Encoding.BASE64:
source = base64.b64encode(source)

return SourceFileData(fileId=sourcefile.id,
filePath=sourcefile.filepath,
Expand Down Expand Up @@ -1631,10 +1633,12 @@ def needFileContent(self, run_id, filepath):
return NeedFileResult(needed, f.id)

@timeit
def addFileContent(self, fid, content):
def addFileContent(self, fid, content, encoding):
"""
"""
content = base64.b64decode(content)
if encoding == Encoding.BASE64:
content = base64.b64decode(content)

try:
f = self.__session.query(File).get(fid)
compresssed_content = zlib.compress(content,
Expand Down
15 changes: 14 additions & 1 deletion tests/functional/report_viewer_api/test_get_run_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
# License. See LICENSE.TXT for details.
# -----------------------------------------------------------------------------

import base64
import logging
import os
import re
import unittest

from codeCheckerDBAccess.ttypes import Encoding
from codeCheckerDBAccess.ttypes import Order
from codeCheckerDBAccess.ttypes import ReportFilter
from codeCheckerDBAccess.ttypes import SortMode
Expand Down Expand Up @@ -124,7 +126,9 @@ def test_get_source_file_content(self):

logging.debug('Getting the content of ' + run_res.checkedFile)

file_data = self._cc_client.getSourceFileData(run_res.fileId, True)
file_data = self._cc_client.getSourceFileData(run_res.fileId,
True,
None)
self.assertIsNotNone(file_data)

file_content1 = file_data.fileContent
Expand All @@ -135,6 +139,15 @@ def test_get_source_file_content(self):

self.assertEqual(file_content1, file_content2)

file_data_b64 = self._cc_client.getSourceFileData(
run_res.fileId, True, Encoding.BASE64)
self.assertIsNotNone(file_data_b64)

file_content1_b64 = base64.b64decode(file_data_b64.fileContent)
self.assertIsNotNone(file_content1_b64)

self.assertEqual(file_content1_b64, file_content2)

logging.debug('got ' + str(len(run_results)) + ' files')

self.assertEqual(run_result_count, len(run_results))
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/report_viewer_api/test_hash_clash.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _create_file(self, run_id, name, cols=10, lines=10):

content = _generate_content(cols, lines)
content = base64.b64encode(content)
success = self._report.addFileContent(need.fileId, content)
success = self._report.addFileContent(need.fileId, content, None)
self.assertTrue(success)

return need.fileId, path
Expand Down
2 changes: 1 addition & 1 deletion tests/performance/run_performance_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def store_reports(run_id, file_content, test_conf):
str(file_count)
).fileId

store_server.addFileContent(file_id, file_content)
store_server.addFileContent(file_id, file_content, None)

store_server.finishBuildAction(ba_id, '')
for bug_count in range(bug_per_file):
Expand Down
2 changes: 1 addition & 1 deletion www/scripts/codecheckerviewer/BugViewer.js
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,7 @@ function (declare, dom, style, on, query, Memory, Observable, topic, entities,
runData : this.runData
});

CC_SERVICE.getSourceFileData(this.reportData.fileId, true,
CC_SERVICE.getSourceFileData(this.reportData.fileId, true, null,
function (sourceFileData) {
that._editor.set('sourceFileData', sourceFileData);
that._editor.drawBugPath();
Expand Down

0 comments on commit ebb1d03

Please sign in to comment.