buffer: improve Buffer#fill performance

1) This improves the performance for Buffer#fill by using shortcuts. 2) It also ports throwing errors to JS. That way they contain the proper error code. 3) Using negative `end` values will from now on result in an error instead of just doing nothing. 4) Passing in `null` as encoding is from now on accepted as 'utf8'. PR-URL: nodejs#18790 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
BridgeAR · Mar 2, 2018 · 177b731 · 177b731
1 parent d3af120
commit 177b731
Show file tree

Hide file tree

Showing 6 changed files with 85 additions and 136 deletions.
diff --git a/doc/api/buffer.md b/doc/api/buffer.md
@@ -1202,6 +1202,9 @@ console.log(buf1.equals(buf3));
 <!-- YAML
 added: v0.5.0
 changes:
+  - version: REPLACEME
+    pr-url: https://github.com/nodejs/node/pull/18790
+    description: Negative `end` values throw an `ERR_INDEX_OUT_OF_RANGE` error.
   - version: REPLACEME
     pr-url: https://github.com/nodejs/node/pull/18129
     description: Attempting to fill a non-zero length buffer with a zero length

diff --git a/lib/buffer.js b/lib/buffer.js
@@ -254,16 +254,8 @@ function assertSize(size) {
  **/
 Buffer.alloc = function alloc(size, fill, encoding) {
   assertSize(size);
-  if (size > 0 && fill !== undefined) {
-    // Since we are filling anyway, don't zero fill initially.
-    // Only pay attention to encoding if it's a string. This
-    // prevents accidentally sending in a number that would
-    // be interpreted as a start offset.
-    if (typeof encoding !== 'string')
-      encoding = undefined;
-    const ret = createUnsafeBuffer(size);
-    if (fill_(ret, fill, encoding) > 0)
-      return ret;
+  if (fill !== undefined && size > 0) {
+    return _fill(createUnsafeBuffer(size), fill, encoding);
   }
   return new FastBuffer(size);
 };
@@ -834,14 +826,12 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
 //    buffer.fill(buffer[, offset[, end]])
 //    buffer.fill(string[, offset[, end]][, encoding])
 Buffer.prototype.fill = function fill(val, start, end, encoding) {
-  fill_(this, val, start, end, encoding);
-  return this;
+  return _fill(this, val, start, end, encoding);
 };
 
-function fill_(buf, val, start, end, encoding) {
-  // Handle string cases:
+function _fill(buf, val, start, end, encoding) {
   if (typeof val === 'string') {
-    if (typeof start === 'string') {
+    if (start === undefined || typeof start === 'string') {
       encoding = start;
       start = 0;
       end = buf.length;
@@ -850,46 +840,60 @@ function fill_(buf, val, start, end, encoding) {
       end = buf.length;
     }
 
-    if (encoding !== undefined && typeof encoding !== 'string') {
-      throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'encoding',
-                                 'string', encoding);
-    }
-    var normalizedEncoding = normalizeEncoding(encoding);
+    const normalizedEncoding = normalizeEncoding(encoding);
     if (normalizedEncoding === undefined) {
+      if (typeof encoding !== 'string') {
+        throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'encoding', 'string',
+                                   encoding);
+      }
       throw new errors.TypeError('ERR_UNKNOWN_ENCODING', encoding);
     }
 
     if (val.length === 0) {
-      // Previously, if val === '', the Buffer would not fill,
-      // which is rather surprising.
+      // If val === '' default to zero.
       val = 0;
     } else if (val.length === 1) {
-      var code = val.charCodeAt(0);
-      if ((normalizedEncoding === 'utf8' && code < 128) ||
-          normalizedEncoding === 'latin1') {
-        // Fast path: If `val` fits into a single byte, use that numeric value.
-        val = code;
+      // Fast path: If `val` fits into a single byte, use that numeric value.
+      if (normalizedEncoding === 'utf8') {
+        const code = val.charCodeAt(0);
+        if (code < 128) {
+          val = code;
+        }
+      } else if (normalizedEncoding === 'latin1') {
+        val = val.charCodeAt(0);
       }
     }
-  } else if (typeof val === 'number') {
-    val = val & 255;
+  } else {
+    encoding = undefined;
   }
 
-  // Invalid ranges are not set to a default, so can range check early.
-  if (start < 0 || end > buf.length)
-    throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
-
-  if (end <= start)
-    return 0;
-
-  start = start >>> 0;
-  end = end === undefined ? buf.length : end >>> 0;
-  const fillLength = bindingFill(buf, val, start, end, encoding);
+  if (start === undefined) {
+    start = 0;
+    end = buf.length;
+  } else {
+    // Invalid ranges are not set to a default, so can range check early.
+    if (end === undefined) {
+      if (start < 0)
+        throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
+      end = buf.length;
+    } else {
+      if (start < 0 || end > buf.length || end < 0)
+        throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
+      end = end >>> 0;
+    }
+    start = start >>> 0;
+    if (start >= end)
+      return buf;
+  }
 
-  if (fillLength === -1)
-    throw new errors.TypeError('ERR_INVALID_ARG_VALUE', 'value', val);
+  const res = bindingFill(buf, val, start, end, encoding);
+  if (res < 0) {
+    if (res === -1)
+      throw new errors.TypeError('ERR_INVALID_ARG_VALUE', 'value', val);
+    throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
+  }
 
-  return fillLength;
+  return buf;
 }
 
 

diff --git a/lib/internal/util.js b/lib/internal/util.js
@@ -102,7 +102,7 @@ function assertCrypto() {
 // function in order to avoid the performance hit.
 // Return undefined if there is no match.
 function normalizeEncoding(enc) {
-  if (!enc) return 'utf8';
+  if (enc == null || enc === '') return 'utf8';
   let retried;
   while (true) {
     switch (enc) {

diff --git a/src/node_buffer.cc b/src/node_buffer.cc
@@ -571,10 +571,10 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
   Local<String> str_obj;
   size_t str_length;
   enum encoding enc;
-  THROW_AND_RETURN_IF_OOB(start <= end);
-  THROW_AND_RETURN_IF_OOB(fill_length + start <= ts_obj_length);
 
-  args.GetReturnValue().Set(static_cast<double>(fill_length));
+  // OOB Check. Throw the error in JS.
+  if (start > end || fill_length + start > ts_obj_length)
+    return args.GetReturnValue().Set(-2);
 
   // First check if Buffer has been passed.
   if (Buffer::HasInstance(args[1])) {
@@ -593,29 +593,24 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
 
   str_obj = args[1]->ToString(env->context()).ToLocalChecked();
   enc = ParseEncoding(env->isolate(), args[4], UTF8);
-  str_length =
-      enc == UTF8 ? str_obj->Utf8Length() :
-      enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();
-
-  if (str_length == 0) {
-    args.GetReturnValue().Set(0);
-    return;
-  }
 
   // Can't use StringBytes::Write() in all cases. For example if attempting
   // to write a two byte character into a one byte Buffer.
   if (enc == UTF8) {
+    str_length = str_obj->Utf8Length();
     node::Utf8Value str(env->isolate(), args[1]);
     memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
 
   } else if (enc == UCS2) {
+    str_length = str_obj->Length() * sizeof(uint16_t);
     node::TwoByteValue str(env->isolate(), args[1]);
     if (IsBigEndian())
       SwapBytes16(reinterpret_cast<char*>(&str[0]), str_length);
 
     memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
 
   } else {
+    str_length = str_obj->Length();
     // Write initial String to Buffer, then use that memory to copy remainder
     // of string. Correct the string length for cases like HEX where less than
     // the total string length is written.