Skip to content

Commit

Permalink
add json minify
Browse files Browse the repository at this point in the history
  • Loading branch information
wjr-z committed Aug 14, 2024
1 parent 75990fd commit 51ab727
Show file tree
Hide file tree
Showing 7 changed files with 352 additions and 25 deletions.
Empty file added include/wjr/atomic.hpp
Empty file.
6 changes: 6 additions & 0 deletions include/wjr/format/encoding/huffman.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifndef WJR_FORMAT_ENCODING_HUFFMAN_HPP__
#define WJR_FORMAT_ENCODING_HUFFMAN_HPP__

namespace wjr {}

#endif // WJR_FORMAT_ENCODING_HUFFMAN_HPP__
2 changes: 1 addition & 1 deletion include/wjr/json/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,7 +1165,7 @@ struct __json_serializer_array {
auto val_first = val.begin();

if (n > val.size()) {
auto val_last = val.end();
const auto val_last = val.end();
for (; val_first != val_last; ++first, ++val_first) {
first->get_to(*val_first);
}
Expand Down
2 changes: 2 additions & 0 deletions include/wjr/json/lexer-impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class lexer {
uint32_t idx = 0;
};

extern char *minify(char *dst, const char *first, const char *last) noexcept;

} // namespace wjr::json

#endif // WJR_JSON_LEXER_IMPL_HPP__
1 change: 1 addition & 0 deletions include/wjr/x86/json/lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#if WJR_HAS_SIMD(SSSE3)
#define WJR_HAS_BUILTIN_JSON_LEXER_READER_READ_BUF WJR_HAS_DEF
#define WJR_HAS_BUILTIN_JSON_MINIFY_BUF WJR_HAS_DEF
#endif

#endif // WJR_X86_JSON_LEXER_HPP__
116 changes: 94 additions & 22 deletions src/wjr/json/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

namespace wjr::json {

#if !WJR_HAS_BUILTIN(JSON_LEXER_READER_READ_BUF)
#if !WJR_HAS_BUILTIN(JSON_LEXER_READER_READ_BUF) || !WJR_HAS_BUILTIN(JSON_MINIFY_BUF)

namespace lexer_detail {

Expand All @@ -18,7 +18,18 @@ constexpr static std::array<uint8_t, 256> code_table = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};

}
constexpr static std::array<uint8_t, 16 * 8> diff_table = {
1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4, 5, 6, 7, 0, 0,
1, 2, 3, 4, 5, 6, 1, 2, 2, 3, 4, 5, 6, 7, 0, 1, 1, 2, 3, 4, 5, 6, 1, 1, 1, 2,
3, 4, 5, 6, 0, 0, 0, 1, 2, 3, 4, 5, 1, 2, 3, 3, 4, 5, 6, 7, 0, 1, 2, 2, 3, 4,
5, 6, 1, 1, 2, 2, 3, 4, 5, 6, 0, 0, 1, 1, 2, 3, 4, 5, 1, 2, 2, 2, 3, 4, 5, 6,
0, 1, 1, 1, 2, 3, 4, 5, 1, 1, 1, 1, 2, 3, 4, 5, 0, 0, 0, 0, 1, 2, 3, 4};

} // namespace lexer_detail

#endif

#if !WJR_HAS_BUILTIN(JSON_LEXER_READER_READ_BUF)

typename lexer::result_type lexer::read(uint32_t *token_buf,
size_type token_buf_size) noexcept {
Expand All @@ -43,27 +54,8 @@ typename lexer::result_type lexer::read(uint32_t *token_buf,
if (diff == 64) {
ptr = first;
} else {
char ch;
switch (last[-1]) {
case ' ':
case '\n':
case '\r':
case '\t':
case '[':
case ']':
case '{':
case '}': {
ch = ' ';
break;
}
default: {
ch = '\0';
break;
}
}

std::memcpy(stk, first, diff);
std::memset(stk + diff, ch, 64 - diff);
std::memset(stk + diff, ' ', 64 - diff);
ptr = stk;
}

Expand Down Expand Up @@ -175,4 +167,84 @@ typename lexer::result_type lexer::read(uint32_t *token_buf,

#endif

#if !WJR_HAS_BUILTIN(JSON_MINIFY_BUF)

char *minify(char *dst, const char *first, const char *last) noexcept {
if (WJR_UNLIKELY(first == last)) {
return dst;
}

using namespace lexer_detail;

WJR_ASSERT_ASSUME_L2(first < last);

uint64_t prev_in_string = 0;
uint64_t prev_is_escape = 0;

char stk[64];

do {
const char *ptr;

if (const size_t diff = last - first; WJR_LIKELY(diff > 64)) {
ptr = first;
first += 64;
} else {
if (diff == 64) {
ptr = first;
} else {
std::memset(stk, ' ', 64);
std::memcpy(stk, first, diff);
ptr = stk;
}

first = last;
}

uint64_t MASK[4][5] = {{0}};

for (int i = 0; i < 64; i += 4) {
MASK[0][code_table[static_cast<uint8_t>(ptr[i])]] |= 1ull << i;
MASK[1][code_table[static_cast<uint8_t>(ptr[i + 1])]] |= 1ull << (i + 1);
MASK[2][code_table[static_cast<uint8_t>(ptr[i + 2])]] |= 1ull << (i + 2);
MASK[3][code_table[static_cast<uint8_t>(ptr[i + 3])]] |= 1ull << (i + 3);
}

uint64_t B = MASK[0][0] | MASK[1][0] | MASK[2][0] | MASK[3][0];
uint64_t Q = MASK[0][1] | MASK[1][1] | MASK[2][1] | MASK[3][1];
uint64_t W = MASK[0][3] | MASK[1][3] | MASK[2][3] | MASK[3][3];

if (WJR_LIKELY(!B)) {
B = prev_is_escape;
prev_is_escape = 0;
} else {
const uint64_t codeB = calc_backslash(B & ~prev_is_escape);
const auto escape = (codeB & B) >> 63;
B = codeB ^ (B | prev_is_escape);
prev_is_escape = escape;
}

Q &= ~B;
const uint64_t R = prefix_xor(Q) ^ prev_in_string;
prev_in_string = static_cast<uint64_t>(static_cast<int64_t>(R) >> 63);
W &= ~(R | Q);

for (int i = 0; i < 64; i += 4) {
const uint8_t X = (W >> i) & 0x0F;

dst[0] = ptr[i];
dst[diff_table[X * 8]] = ptr[i + 1];
dst[diff_table[X * 8 + 1]] = ptr[i + 2];
dst[diff_table[X * 8 + 2]] = ptr[i + 3];

dst += diff_table[X * 8 + 3];
}

} while (WJR_LIKELY(first != last));

return dst;
}

#endif

} // namespace wjr::json
Loading

0 comments on commit 51ab727

Please sign in to comment.