Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

buffer: optimize hex parsing. #7602

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions benchmark/buffers/buffer-hex.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
'use strict';

const common = require('../common.js');

const bench = common.createBenchmark(main, {
len: [0, 1, 64, 1024],
n: [1e7]
});

function main(conf) {
const len = conf.len | 0;
const n = conf.n | 0;
const buf = Buffer.alloc(len);
var i, b;

for (i = 0; i < buf.length; i++)
buf[i] = i & 0xff;

const hex = buf.toString('hex');

bench.start();

for (i = 0; i < n; i += 1)
b = Buffer.from(hex, 'hex');

bench.end(n);
}
37 changes: 24 additions & 13 deletions src/string_bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,27 @@ const int8_t unbase64_table[256] =
};


template <typename TypeName>
unsigned hex2bin(TypeName c) {
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'A' && c <= 'F')
return 10 + (c - 'A');
if (c >= 'a' && c <= 'f')
return 10 + (c - 'a');
return static_cast<unsigned>(-1);
}
static const int8_t unhex_table[256] =
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};

#define unhex(x) \
static_cast<unsigned>(unhex_table[static_cast<uint8_t>(x)])


template <typename TypeName>
Expand All @@ -162,11 +173,11 @@ size_t hex_decode(char* buf,
const size_t srcLen) {
size_t i;
for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) {
unsigned a = hex2bin(src[i * 2 + 0]);
unsigned b = hex2bin(src[i * 2 + 1]);
unsigned a = unhex(src[i * 2 + 0]);
unsigned b = unhex(src[i * 2 + 1]);
if (!~a || !~b)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this still work with unhex()’s return type changed? It seems to me some of the speedup may come from the compiler eliminating this branch completely because it knows the upper bits of a and b will never be set:

$ ./node-master -p 'Buffer(10).write("abcdxx", "hex")'
2
$ ./node -p 'Buffer(10).write("abcdxx", "hex")'
3

(aka we need more tests to cover these cases?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@addaleax, good catch. I didn't notice the type difference. It looks like you're right.

Applying this patch:

diff --git a/src/string_bytes.cc b/src/string_bytes.cc
index c216c5d..cb0e78a 100644
--- a/src/string_bytes.cc
+++ b/src/string_bytes.cc
@@ -173,9 +173,9 @@ size_t hex_decode(char* buf,
                   const size_t srcLen) {
   size_t i;
   for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) {
-    unsigned a = unhex(src[i * 2 + 0]);
-    unsigned b = unhex(src[i * 2 + 1]);
-    if (!~a || !~b)
+    uint8_t a = unhex(src[i * 2 + 0]);
+    uint8_t b = unhex(src[i * 2 + 1]);
+    if ((a | b) & 0x80)
       return i;
     buf[i] = (a << 4) | b;
   }

Kills some of the gained perf, but large hex strings are still faster:

$ ./out/Release/node benchmark/buffers/buffer-hex.js
buffers/buffer-hex.js len="0" n="10000000": 5888692.25695
buffers/buffer-hex.js len="1" n="10000000": 1471220.48559
buffers/buffer-hex.js len="64" n="10000000": 1206331.66340
buffers/buffer-hex.js len="1024" n="10000000": 385088.76228

I'll see what else I can do, and probably add a test for bad hex strings.

return i;
buf[i] = a * 16 + b;
buf[i] = (a << 4) | b;
}

return i;
Expand Down
44 changes: 44 additions & 0 deletions test/parallel/test-buffer-badhex.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
'use strict';
var common = require('../common');
var assert = require('assert');

var Buffer = require('buffer').Buffer;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const here please


// Test hex strings and bad hex strings
{
const buf1 = Buffer.alloc(4);
assert.equal(buf1.length, 4);
assert.deepStrictEqual(buf1, new Buffer([0, 0, 0, 0]));
assert.equal(buf1.write('abcdxx', 0, 'hex'), 2);
assert.deepStrictEqual(buf1, new Buffer([0xab, 0xcd, 0x00, 0x00]));
assert.equal(buf1.toString('hex'), 'abcd0000');
assert.equal(buf1.write('abcdef01', 0, 'hex'), 4);
assert.deepStrictEqual(buf1, new Buffer([0xab, 0xcd, 0xef, 0x01]));
assert.equal(buf1.toString('hex'), 'abcdef01');

const buf2 = Buffer.from(buf1.toString('hex'), 'hex');
assert.equal(buf1.toString('hex'), buf2.toString('hex'));

const buf3 = Buffer.alloc(5);
assert.equal(buf3.write('abcdxx', 1, 'hex'), 2);
assert.equal(buf3.toString('hex'), '00abcd0000');

const buf4 = Buffer.alloc(4);
assert.deepStrictEqual(buf4, new Buffer([0, 0, 0, 0]));
assert.equal(buf4.write('xxabcd', 0, 'hex'), 0);
assert.deepStrictEqual(buf4, new Buffer([0, 0, 0, 0]));
assert.equal(buf4.write('xxab', 1, 'hex'), 0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: assert.strictEqual is generally preferred over assert.equal, especially for comparisons with 0. :)

assert.deepStrictEqual(buf4, new Buffer([0, 0, 0, 0]));
assert.equal(buf4.write('cdxxab', 0, 'hex'), 1);
assert.deepStrictEqual(buf4, new Buffer([0xcd, 0, 0, 0]));

const buf5 = Buffer.alloc(256);
for (let i = 0; i < 256; i++)
buf5[i] = i;

const hex = buf5.toString('hex');
assert.deepStrictEqual(Buffer.from(hex, 'hex'), buf5);

const badHex = hex.slice(0, 256) + 'xx' + hex.slice(256, 510);
assert.deepStrictEqual(Buffer.from(badHex, 'hex'), buf5.slice(0, 128));
}