Skip to content

Commit

Permalink
fixes #138 - retain UTF-8 BOM on Save
Browse files Browse the repository at this point in the history
  • Loading branch information
d99kris committed Mar 18, 2023
1 parent 43c1763 commit bb2db43
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 4 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ if(RAPIDCSV_BUILD_TESTS)
add_unit_test(test091)
add_unit_test(test092)
add_unit_test(test093)
add_unit_test(test094)

# perf tests
add_perf_test(ptest001)
Expand Down
15 changes: 12 additions & 3 deletions src/rapidcsv.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* rapidcsv.h
*
* URL: https://github.com/d99kris/rapidcsv
* Version: 8.74
* Version: 8.75
*
* Copyright (C) 2017-2023 Kristofer Berggren
* All rights reserved.
Expand Down Expand Up @@ -43,6 +43,7 @@ namespace rapidcsv
#else
static const bool sPlatformHasCR = false;
#endif
static const std::vector<char> s_Utf8BOM = { '\xef', '\xbb', '\xbf' };

/**
* @brief Datastructure holding parameters controlling how invalid numbers (including
Expand Down Expand Up @@ -601,6 +602,7 @@ namespace rapidcsv
mIsUtf16 = false;
mIsLE = false;
#endif
mHasUtf8BOM = false;
}

/**
Expand Down Expand Up @@ -1506,8 +1508,8 @@ namespace rapidcsv
{
std::vector<char> bom3b(3, '\0');
pStream.read(bom3b.data(), 3);
static const std::vector<char> bomU8 = { '\xef', '\xbb', '\xbf' };
if (bom3b != bomU8)

if (bom3b != s_Utf8BOM)
{
// file does not start with a UTF-8 Byte order mark
pStream.seekg(0, std::ios::beg);
Expand All @@ -1516,6 +1518,7 @@ namespace rapidcsv
{
// file did start with a UTF-8 Byte order mark, simply skip it
length -= 3;
mHasUtf8BOM = true;
}
}

Expand Down Expand Up @@ -1674,6 +1677,11 @@ namespace rapidcsv
std::ofstream stream;
stream.exceptions(std::ofstream::failbit | std::ofstream::badbit);
stream.open(mPath, std::ios::binary | std::ios::trunc);
if (mHasUtf8BOM)
{
stream.write(s_Utf8BOM.data(), 3);
}

WriteCsv(stream);
}
}
Expand Down Expand Up @@ -1846,5 +1854,6 @@ namespace rapidcsv
bool mIsUtf16 = false;
bool mIsLE = false;
#endif
bool mHasUtf8BOM = false;
};
}
2 changes: 1 addition & 1 deletion tests/test066.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// test066.cpp - test UTF-8 Byte order mark skipping
// test066.cpp - read UTF-8 BOM

#include "rapidcsv.h"
#include "unittest.h"
Expand Down
35 changes: 35 additions & 0 deletions tests/test094.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// test094.cpp - write UTF-8 BOM

#include "rapidcsv.h"
#include "unittest.h"

int main()
{
int rv = 0;

std::string origCsvWithBom =
"\xef\xbb\xbfID\n"
"1\n"
;

std::string path = unittest::TempPath();
unittest::WriteFile(path, origCsvWithBom);

try
{
rapidcsv::Document doc(path, rapidcsv::LabelParams(0, -1));
doc.Save();

std::string readCsvWithBom = unittest::ReadFile(path);
unittest::ExpectEqual(std::string, origCsvWithBom, readCsvWithBom);
}
catch (const std::exception& ex)
{
std::cout << ex.what() << std::endl;
rv = 1;
}

unittest::DeleteFile(path);

return rv;
}

0 comments on commit bb2db43

Please sign in to comment.