Skip to content

Commit

Permalink
Review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tveasey committed May 23, 2018
1 parent 5e6aa19 commit 0e126a0
Show file tree
Hide file tree
Showing 10 changed files with 177 additions and 116 deletions.
63 changes: 52 additions & 11 deletions include/core/CCompressUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,13 @@ namespace core {
//! a multi-threaded application it would be best to create
//! one object for each thread.
//!
class CORE_EXPORT CCompressUtils : private CNonCopyable {
class CORE_EXPORT CCompressUtil : private CNonCopyable {
public:
using TByteVec = std::vector<Bytef>;
enum EOperation { E_Deflate, E_Inflate };

public:
CCompressUtils(EOperation operation, bool lengthOnly, int level = Z_DEFAULT_COMPRESSION);
~CCompressUtils();
CCompressUtil(bool lengthOnly);
virtual ~CCompressUtil() = default;

//! Add a string.
//!
Expand Down Expand Up @@ -74,30 +73,40 @@ class CORE_EXPORT CCompressUtils : private CNonCopyable {
//! Get transformed representation.
//!
//! \warning This will fail if the lengthOnly constructor argument
//! was set to true.
//! was set to true.
//!
//! \note The output representation is a byte array NOT a string,
//! and hence not printable.
//!
//! If finish==false then retrieve partial state.
bool data(bool finish, TByteVec& result);

//! Get transformed representation.
//!
//! \note This is equivalent to calling data with finish==true, but
//! also takes the cached state (avoiding the copy).
bool finishAndTakeData(TByteVec& result);

//! Get transformed data length.
//!
//! If finish==false then retrieve partial length.
bool length(bool finish, size_t& length);
bool length(bool finish, std::size_t& length);

//! Reset the underlying stream. This will happen automatically
//! when adding a new string after having finished the previous
//! round, but sometimes, for example when recovering from an
//! error, it may be desirable to explicitly reset the state.
void reset();

protected:
//! Get the underlying stream.
z_stream& stream();

private:
enum EState { E_Unused, E_Active, E_Finished };

private:
static const size_t CHUNK_SIZE{4096};
static const std::size_t CHUNK_SIZE{4096};

private:
//! Get an unsigned character pointer to the address of the start
Expand Down Expand Up @@ -139,7 +148,7 @@ class CORE_EXPORT CCompressUtils : private CNonCopyable {
m_ZlibStrm.next_in = bytes(input);
m_ZlibStrm.avail_in = size(input);

int flush(finish ? Z_FINISH : Z_NO_FLUSH);
int flush{finish ? Z_FINISH : Z_NO_FLUSH};
do {
if (this->processChunk(flush) == false) {
return false;
Expand All @@ -151,13 +160,19 @@ class CORE_EXPORT CCompressUtils : private CNonCopyable {
return true;
}

//! Preparation before returning any data.
bool prepareToReturnData(bool finish);

//! Process a chunk with the stream.
virtual int streamProcessChunk(int flush) = 0;

//! Reset the underlying stream.
virtual int resetStream() = 0;

private:
//! The current state of deflation or inflation.
EState m_State;

//! The mode of operation i.e. deflation or inflation.
EOperation m_Operation;

//! Is this object only fit for getting output lengths?
bool m_LengthOnly;

Expand All @@ -171,6 +186,32 @@ class CORE_EXPORT CCompressUtils : private CNonCopyable {
//! The zlib data structure.
z_stream m_ZlibStrm;
};

//! \brief Implementation of CompressUtil for deflating data.
class CORE_EXPORT CDeflator final : public CCompressUtil {
public:
CDeflator(bool lengthOnly, int level = Z_DEFAULT_COMPRESSION);
~CDeflator();

private:
//! Process a chunk of state (optionally flushing).
virtual int streamProcessChunk(int flush);
//! Reset the underlying stream.
virtual int resetStream();
};

//! \brief Implementation of CompressUtil for inflating data.
class CORE_EXPORT CInflator final : public CCompressUtil {
public:
CInflator(bool lengthOnly);
~CInflator();

private:
//! Process a chunk of state (optionally flushing).
virtual int streamProcessChunk(int flush);
//! Reset the underlying stream.
virtual int resetStream();
};
}
}

Expand Down
2 changes: 1 addition & 1 deletion include/core/CStringSimilarityTester.h
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ class CORE_EXPORT CStringSimilarityTester : private CNonCopyable {
static const int MINUS_INFINITE_INT;

//! Used by the compression-based similarity measures
mutable CCompressUtils m_Compressor;
mutable CDeflator m_Compressor;

// For unit testing
friend class ::CStringSimilarityTesterTest;
Expand Down
159 changes: 90 additions & 69 deletions lib/core/CCompressUtils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,14 @@
namespace ml {
namespace core {

CCompressUtils::CCompressUtils(EOperation operation, bool lengthOnly, int level)
: m_State{E_Unused}, m_Operation{operation}, m_LengthOnly{lengthOnly} {
CCompressUtil::CCompressUtil(bool lengthOnly)
: m_State{E_Unused}, m_LengthOnly{lengthOnly} {
::memset(&m_ZlibStrm, 0, sizeof(z_stream));

m_ZlibStrm.zalloc = Z_NULL;
m_ZlibStrm.zfree = Z_NULL;

int ret{Z_OK};
switch (m_Operation) {
case E_Deflate:
ret = ::deflateInit(&m_ZlibStrm, level);
break;
case E_Inflate:
ret = ::inflateInit(&m_ZlibStrm);
}
if (ret != Z_OK) {
LOG_ABORT(<< "Error initialising Z stream: " << ::zError(ret));
}
}

CCompressUtils::~CCompressUtils() {
int ret{Z_OK};
switch (m_Operation) {
case E_Deflate:
ret = ::deflateEnd(&m_ZlibStrm);
break;
case E_Inflate:
ret = ::inflateEnd(&m_ZlibStrm);
break;
}
if (ret != Z_OK) {
LOG_ERROR(<< "Error ending Z stream: " << ::zError(ret));
}
}

bool CCompressUtils::addString(const std::string& input) {
bool CCompressUtil::addString(const std::string& input) {
if (m_State == E_Finished) {
// If the last round of data processing has finished
// and we're adding a new vector then we need to reset
Expand All @@ -57,34 +29,23 @@ bool CCompressUtils::addString(const std::string& input) {
return this->processInput(false, input);
}

bool CCompressUtils::data(bool finish, TByteVec& result) {
if (m_LengthOnly) {
LOG_ERROR(<< "Cannot get data if asked for length-only");
bool CCompressUtil::data(bool finish, TByteVec& result) {
if (this->prepareToReturnData(finish) == false) {
return false;
}
result = m_FullResult;
return true;
}

if (m_State == E_Unused) {
LOG_ERROR(<< "Cannot get data - nothing added");
bool CCompressUtil::finishAndTakeData(TByteVec& result) {
if (this->prepareToReturnData(true) == false) {
return false;
}

if (finish && m_State == E_Active) {
if (this->processInput(finish, std::string()) == false) {
LOG_ERROR(<< "Failed to finish processing");
return false;
}
}

if (finish) {
result = std::move(m_FullResult);
} else {
result = m_FullResult;
}

result = std::move(m_FullResult);
return true;
}

bool CCompressUtils::length(bool finish, size_t& length) {
bool CCompressUtil::length(bool finish, std::size_t& length) {
if (m_State == E_Unused) {
LOG_ERROR(<< "Cannot get length - nothing added");
return false;
Expand All @@ -102,42 +63,102 @@ bool CCompressUtils::length(bool finish, size_t& length) {
return true;
}

void CCompressUtils::reset() {
int ret(::deflateReset(&m_ZlibStrm));
void CCompressUtil::reset() {
int ret{this->resetStream()};
if (ret != Z_OK) {
// deflateReset() will only fail if one or more of the critical members
// of the current stream struct are NULL. If this happens then memory
// corruption must have occurred, because there's nowhere where we set
// these pointers to NULL after initialisation, so it's reasonable to
// abort.
// resetStream() will only fail if one or more of the critical
// members of the current z_stream struct are NULL. If this
// happens then memory corruption must have occurred, because
// there's nowhere where we set these pointers to NULL after
// initialisation, so it's reasonable to abort.
LOG_ABORT(<< "Error reseting Z stream: " << ::zError(ret));
}
m_State = E_Unused;
}

bool CCompressUtils::processChunk(int flush) {
z_stream& CCompressUtil::stream() {
return m_ZlibStrm;
}

bool CCompressUtil::processChunk(int flush) {
m_ZlibStrm.next_out = m_Chunk;
m_ZlibStrm.avail_out = CHUNK_SIZE;

int ret{Z_OK};
switch (m_Operation) {
case E_Deflate:
ret = ::deflate(&m_ZlibStrm, flush);
break;
case E_Inflate:
ret = ::inflate(&m_ZlibStrm, flush);
break;
}
int ret{this->streamProcessChunk(flush)};
if (ret == Z_STREAM_ERROR) {
LOG_ERROR(<< "Error processing: " << ::zError(ret));
return false;
}

size_t have(CHUNK_SIZE - m_ZlibStrm.avail_out);
std::size_t have{CHUNK_SIZE - m_ZlibStrm.avail_out};
if (!m_LengthOnly) {
m_FullResult.insert(m_FullResult.end(), &m_Chunk[0], &m_Chunk[have]);
}
return true;
}

bool CCompressUtil::prepareToReturnData(bool finish) {
if (m_LengthOnly) {
LOG_ERROR(<< "Cannot get data if asked for length-only");
return false;
}

if (m_State == E_Unused) {
LOG_ERROR(<< "Cannot get data - nothing added");
return false;
}

if (finish && m_State == E_Active) {
if (this->processInput(finish, std::string()) == false) {
LOG_ERROR(<< "Failed to finish processing");
return false;
}
}
return true;
}

CDeflator::CDeflator(bool lengthOnly, int level) : CCompressUtil{lengthOnly} {
int ret{::deflateInit(&this->stream(), level)};
if (ret != Z_OK) {
LOG_ABORT(<< "Error initialising Z stream: " << ::zError(ret));
}
}

CDeflator::~CDeflator() {
int ret{::deflateEnd(&this->stream())};
if (ret != Z_OK) {
LOG_ERROR(<< "Error ending Z stream: " << ::zError(ret));
}
}

int CDeflator::streamProcessChunk(int flush) {
return ::deflate(&this->stream(), flush);
}

int CDeflator::resetStream() {
return ::deflateReset(&this->stream());
}

CInflator::CInflator(bool lengthOnly) : CCompressUtil{lengthOnly} {
int ret{::inflateInit(&this->stream())};
if (ret != Z_OK) {
LOG_ABORT(<< "Error initialising Z stream: " << ::zError(ret));
}
}

CInflator::~CInflator() {
int ret{::inflateEnd(&this->stream())};
if (ret != Z_OK) {
LOG_ERROR(<< "Error ending Z stream: " << ::zError(ret));
}
}

int CInflator::streamProcessChunk(int flush) {
return ::inflate(&this->stream(), flush);
}

int CInflator::resetStream() {
return ::inflateReset(&this->stream());
}
}
}
3 changes: 1 addition & 2 deletions lib/core/CStringSimilarityTester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ namespace core {

const int CStringSimilarityTester::MINUS_INFINITE_INT(std::numeric_limits<int>::min());

CStringSimilarityTester::CStringSimilarityTester()
: m_Compressor(CCompressUtils::E_Deflate, true) {
CStringSimilarityTester::CStringSimilarityTester() : m_Compressor(true) {
}

bool CStringSimilarityTester::similarity(const std::string& first,
Expand Down
Loading

0 comments on commit 0e126a0

Please sign in to comment.