From af79cc2457c620bc3e10547117aa3bfbbc4e2f88 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Fri, 4 Mar 2022 23:50:05 +0100 Subject: [PATCH 1/7] perf(ext/web): optimize atob/btoa --- ext/web/05_base64.js | 38 ++------------------- ext/web/lib.rs | 79 +++++++++++++++++++++++++++++--------------- 2 files changed, 55 insertions(+), 62 deletions(-) diff --git a/ext/web/05_base64.js b/ext/web/05_base64.js index 1244ecfd5cf36b..804d66fb88322f 100644 --- a/ext/web/05_base64.js +++ b/ext/web/05_base64.js @@ -9,21 +9,8 @@ "use strict"; ((window) => { + const core = Deno.core; const webidl = window.__bootstrap.webidl; - const { - forgivingBase64Encode, - forgivingBase64Decode, - } = window.__bootstrap.infra; - const { DOMException } = window.__bootstrap.domException; - const { - ArrayPrototypeMap, - StringPrototypeCharCodeAt, - ArrayPrototypeJoin, - SafeArrayIterator, - StringFromCharCode, - TypedArrayFrom, - Uint8Array, - } = window.__bootstrap.primordials; /** * @param {string} data @@ -36,13 +23,7 @@ prefix, context: "Argument 1", }); - - const uint8Array = forgivingBase64Decode(data); - const result = ArrayPrototypeMap( - [...new SafeArrayIterator(uint8Array)], - (byte) => StringFromCharCode(byte), - ); - return ArrayPrototypeJoin(result, ""); + return core.opSync("op_base64_atob", data); } /** @@ -56,20 +37,7 @@ prefix, context: "Argument 1", }); - const byteArray = ArrayPrototypeMap( - [...new SafeArrayIterator(data)], - (char) => { - const charCode = StringPrototypeCharCodeAt(char, 0); - if (charCode > 0xff) { - throw new DOMException( - "The string to be encoded contains characters outside of the Latin1 range.", - "InvalidCharacterError", - ); - } - return charCode; - }, - ); - return forgivingBase64Encode(TypedArrayFrom(Uint8Array, byteArray)); + return core.opSync("op_base64_btoa", data); } window.__bootstrap.base64 = { diff --git a/ext/web/lib.rs b/ext/web/lib.rs index b10cb972d1ef2a..40e4a81bf94c73 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -17,6 +17,7 @@ use deno_core::OpState; use deno_core::Resource; use deno_core::ResourceId; use deno_core::ZeroCopyBuf; +use encoding_rs::mem::is_str_latin1; use encoding_rs::CoderResult; use encoding_rs::Decoder; use encoding_rs::DecoderResult; @@ -85,6 +86,8 @@ pub fn init( .ops(vec![ ("op_base64_decode", op_sync(op_base64_decode)), ("op_base64_encode", op_sync(op_base64_encode)), + ("op_base64_atob", op_sync(op_base64_atob)), + ("op_base64_btoa", op_sync(op_base64_btoa)), ( "op_encoding_normalize_label", op_sync(op_encoding_normalize_label), @@ -146,21 +149,33 @@ pub fn init( } fn op_base64_decode( - _state: &mut OpState, + _: &mut OpState, input: String, _: (), ) -> Result { - let mut input: &str = &input.replace(|c| char::is_ascii_whitespace(&c), ""); + Ok(b64_decode(input)?.into()) +} + +fn op_base64_atob( + _: &mut OpState, + input: String, + _: (), +) -> Result { + Ok(String::from_utf8(b64_decode(input)?)?) +} + +fn b64_decode(input: String) -> Result, AnyError> { + let mut input = input.into_bytes(); + input.retain(|c| !c.is_ascii_whitespace()); + // "If the length of input divides by 4 leaving no remainder, then: // if input ends with one or two U+003D EQUALS SIGN (=) characters, // remove them from input." - if input.len() % 4 == 0 { - if input.ends_with("==") { - input = &input[..input.len() - 2] - } else if input.ends_with('=') { - input = &input[..input.len() - 1] - } - } + let input = match input.len() % 4 == 0 { + true if input.ends_with(b"==") => &input[..input.len() - 2], + true if input.ends_with(b"=") => &input[..input.len() - 1], + _ => &input, + }; // "If the length of input divides by 4 leaving a remainder of 1, // throw an InvalidCharacterError exception and abort these steps." @@ -170,38 +185,48 @@ fn op_base64_decode( ); } - if input - .chars() - .any(|c| c != '+' && c != '/' && !c.is_alphanumeric()) - { - return Err( + let cfg = base64::Config::new(base64::CharacterSet::Standard, true) + .decode_allow_trailing_bits(true); + let out = base64::decode_config(&input, cfg).map_err(|err| match err { + base64::DecodeError::InvalidByte(_, _) => { DomExceptionInvalidCharacterError::new( "Failed to decode base64: invalid character", ) - .into(), - ); - } - - let cfg = base64::Config::new(base64::CharacterSet::Standard, true) - .decode_allow_trailing_bits(true); - let out = base64::decode_config(&input, cfg).map_err(|err| { - DomExceptionInvalidCharacterError::new(&format!( + } + _ => DomExceptionInvalidCharacterError::new(&format!( "Failed to decode base64: {:?}", err - )) + )), })?; - Ok(ZeroCopyBuf::from(out)) + + Ok(out) } fn op_base64_encode( - _state: &mut OpState, + _: &mut OpState, s: ZeroCopyBuf, _: (), ) -> Result { + Ok(b64_encode(&s)) +} + +fn op_base64_btoa( + _: &mut OpState, + s: String, + _: (), +) -> Result { + if !is_str_latin1(&s) { + return Err(DomExceptionInvalidCharacterError::new( + "The string to be encoded contains characters outside of the Latin1 range." + ).into()); + } + Ok(b64_encode(&s)) +} + +fn b64_encode(s: impl AsRef<[u8]>) -> String { let cfg = base64::Config::new(base64::CharacterSet::Standard, true) .decode_allow_trailing_bits(true); - let out = base64::encode_config(&s, cfg); - Ok(out) + base64::encode_config(s.as_ref(), cfg) } #[derive(Deserialize)] From 44cad29a35fef887264bdf276559904d74fd4605 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Sat, 5 Mar 2022 00:35:58 +0100 Subject: [PATCH 2/7] fix: atob decode as latin1 --- ext/web/lib.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ext/web/lib.rs b/ext/web/lib.rs index 40e4a81bf94c73..c0bc38a9e848f7 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -17,6 +17,7 @@ use deno_core::OpState; use deno_core::Resource; use deno_core::ResourceId; use deno_core::ZeroCopyBuf; +use encoding_rs::mem::decode_latin1; use encoding_rs::mem::is_str_latin1; use encoding_rs::CoderResult; use encoding_rs::Decoder; @@ -161,7 +162,11 @@ fn op_base64_atob( input: String, _: (), ) -> Result { - Ok(String::from_utf8(b64_decode(input)?)?) + let buf = b64_decode(input)?; + match decode_latin1(&buf) { + Cow::Owned(s) => Ok(s), + Cow::Borrowed(_) => Ok(unsafe { String::from_utf8_unchecked(buf) }), // Avoid copy if already Latin1 + } } fn b64_decode(input: String) -> Result, AnyError> { From 6b29e515afdc0f1a7b01eac42cf6dcc3d2ec3072 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Sat, 5 Mar 2022 00:52:56 +0100 Subject: [PATCH 3/7] cache key --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e25f33d1d3d05b..db75c1baa700b3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -263,7 +263,7 @@ jobs: ~/.cargo/registry/index ~/.cargo/registry/cache ~/.cargo/git/db - key: 4-cargo-home-${{ matrix.os }}-${{ hashFiles('Cargo.lock') }} + key: 5-cargo-home-${{ matrix.os }}-${{ hashFiles('Cargo.lock') }} # In main branch, always creates fresh cache - name: Cache build output (main) @@ -279,7 +279,7 @@ jobs: !./target/*/*.zip !./target/*/*.tar.gz key: | - 4-cargo-target-${{ matrix.os }}-${{ matrix.profile }}-${{ github.sha }} + 5-cargo-target-${{ matrix.os }}-${{ matrix.profile }}-${{ github.sha }} # Restore cache from the latest 'main' branch build. - name: Cache build output (PR) @@ -295,7 +295,7 @@ jobs: !./target/*/*.tar.gz key: never_saved restore-keys: | - 4-cargo-target-${{ matrix.os }}-${{ matrix.profile }}- + 5-cargo-target-${{ matrix.os }}-${{ matrix.profile }}- # Don't save cache after building PRs or branches other than 'main'. - name: Skip save cache (PR) From ca123c168d32cafe9746612090aa20f2f4183649 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Sat, 5 Mar 2022 15:29:47 +0100 Subject: [PATCH 4/7] fix --- ext/web/05_base64.js | 13 ++++++++++++- ext/web/lib.rs | 30 ++++++++++++++---------------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/ext/web/05_base64.js b/ext/web/05_base64.js index 804d66fb88322f..09ce615f355853 100644 --- a/ext/web/05_base64.js +++ b/ext/web/05_base64.js @@ -11,6 +11,7 @@ ((window) => { const core = Deno.core; const webidl = window.__bootstrap.webidl; + const { DOMException } = window.__bootstrap.domException; /** * @param {string} data @@ -37,7 +38,17 @@ prefix, context: "Argument 1", }); - return core.opSync("op_base64_btoa", data); + try { + return core.opSync("op_base64_btoa", data); + } catch (e) { + if (e instanceof TypeError) { + throw new DOMException( + "The string to be encoded contains characters outside of the Latin1 range.", + "InvalidCharacterError", + ); + } + throw e; + } } window.__bootstrap.base64 = { diff --git a/ext/web/lib.rs b/ext/web/lib.rs index c0bc38a9e848f7..1cae232bd75a9d 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -12,13 +12,12 @@ use deno_core::include_js_files; use deno_core::op_async; use deno_core::op_sync; use deno_core::url::Url; +use deno_core::ByteString; use deno_core::Extension; use deno_core::OpState; use deno_core::Resource; use deno_core::ResourceId; use deno_core::ZeroCopyBuf; -use encoding_rs::mem::decode_latin1; -use encoding_rs::mem::is_str_latin1; use encoding_rs::CoderResult; use encoding_rs::Decoder; use encoding_rs::DecoderResult; @@ -154,25 +153,29 @@ fn op_base64_decode( input: String, _: (), ) -> Result { - Ok(b64_decode(input)?.into()) + Ok(b64_decode(input, false)?.into()) } fn op_base64_atob( _: &mut OpState, input: String, _: (), -) -> Result { - let buf = b64_decode(input)?; - match decode_latin1(&buf) { - Cow::Owned(s) => Ok(s), - Cow::Borrowed(_) => Ok(unsafe { String::from_utf8_unchecked(buf) }), // Avoid copy if already Latin1 - } +) -> Result { + Ok(ByteString(b64_decode(input, true)?)) } -fn b64_decode(input: String) -> Result, AnyError> { +fn b64_decode(input: String, padding: bool) -> Result, AnyError> { let mut input = input.into_bytes(); input.retain(|c| !c.is_ascii_whitespace()); + // If padding is expected, fail if not 4-byte aligned + // TODO(@AaronO): cleanup + if padding && input.len() % 4 != 0 && (input.ends_with(b"==") || input.ends_with(b"=")) { + return Err( + DomExceptionInvalidCharacterError::new("Failed to decode base64.").into(), + ); + } + // "If the length of input divides by 4 leaving no remainder, then: // if input ends with one or two U+003D EQUALS SIGN (=) characters, // remove them from input." @@ -217,14 +220,9 @@ fn op_base64_encode( fn op_base64_btoa( _: &mut OpState, - s: String, + s: ByteString, _: (), ) -> Result { - if !is_str_latin1(&s) { - return Err(DomExceptionInvalidCharacterError::new( - "The string to be encoded contains characters outside of the Latin1 range." - ).into()); - } Ok(b64_encode(&s)) } From 73ba2a5f1deefe05ef99a4916717215698468b10 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Sat, 5 Mar 2022 15:34:33 +0100 Subject: [PATCH 5/7] fmt --- ext/web/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ext/web/lib.rs b/ext/web/lib.rs index 1cae232bd75a9d..13b2af4e0fcee2 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -170,7 +170,10 @@ fn b64_decode(input: String, padding: bool) -> Result, AnyError> { // If padding is expected, fail if not 4-byte aligned // TODO(@AaronO): cleanup - if padding && input.len() % 4 != 0 && (input.ends_with(b"==") || input.ends_with(b"=")) { + if padding + && input.len() % 4 != 0 + && (input.ends_with(b"==") || input.ends_with(b"=")) + { return Err( DomExceptionInvalidCharacterError::new("Failed to decode base64.").into(), ); From af8b08f7d7b297dee07180a149f31552a20efb38 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Sat, 5 Mar 2022 15:39:46 +0100 Subject: [PATCH 6/7] lint --- ext/web/05_base64.js | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/web/05_base64.js b/ext/web/05_base64.js index 09ce615f355853..99fd974e3060f2 100644 --- a/ext/web/05_base64.js +++ b/ext/web/05_base64.js @@ -12,6 +12,7 @@ const core = Deno.core; const webidl = window.__bootstrap.webidl; const { DOMException } = window.__bootstrap.domException; + const { TypeError } = window.__bootstrap.primordials; /** * @param {string} data From 1481e42663b5765e2cf1f038b90e262f5955d129 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Sat, 5 Mar 2022 16:05:57 +0100 Subject: [PATCH 7/7] use ByteString input for atob --- ext/web/05_base64.js | 12 +++++++++++- ext/web/lib.rs | 28 +++++++++++++--------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/ext/web/05_base64.js b/ext/web/05_base64.js index 99fd974e3060f2..8238831f86d399 100644 --- a/ext/web/05_base64.js +++ b/ext/web/05_base64.js @@ -25,7 +25,17 @@ prefix, context: "Argument 1", }); - return core.opSync("op_base64_atob", data); + try { + return core.opSync("op_base64_atob", data); + } catch (e) { + if (e instanceof TypeError) { + throw new DOMException( + "Failed to decode base64: invalid character", + "InvalidCharacterError", + ); + } + throw e; + } } /** diff --git a/ext/web/lib.rs b/ext/web/lib.rs index 13b2af4e0fcee2..b8f948159ea9c5 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -153,39 +153,37 @@ fn op_base64_decode( input: String, _: (), ) -> Result { - Ok(b64_decode(input, false)?.into()) + let mut input = input.into_bytes(); + input.retain(|c| !c.is_ascii_whitespace()); + Ok(b64_decode(&input)?.into()) } fn op_base64_atob( _: &mut OpState, - input: String, + s: ByteString, _: (), ) -> Result { - Ok(ByteString(b64_decode(input, true)?)) -} - -fn b64_decode(input: String, padding: bool) -> Result, AnyError> { - let mut input = input.into_bytes(); - input.retain(|c| !c.is_ascii_whitespace()); + let mut s = s.0; + s.retain(|c| !c.is_ascii_whitespace()); // If padding is expected, fail if not 4-byte aligned - // TODO(@AaronO): cleanup - if padding - && input.len() % 4 != 0 - && (input.ends_with(b"==") || input.ends_with(b"=")) - { + if s.len() % 4 != 0 && (s.ends_with(b"==") || s.ends_with(b"=")) { return Err( DomExceptionInvalidCharacterError::new("Failed to decode base64.").into(), ); } + Ok(ByteString(b64_decode(&s)?)) +} + +fn b64_decode(input: &[u8]) -> Result, AnyError> { // "If the length of input divides by 4 leaving no remainder, then: // if input ends with one or two U+003D EQUALS SIGN (=) characters, // remove them from input." let input = match input.len() % 4 == 0 { true if input.ends_with(b"==") => &input[..input.len() - 2], true if input.ends_with(b"=") => &input[..input.len() - 1], - _ => &input, + _ => input, }; // "If the length of input divides by 4 leaving a remainder of 1, @@ -198,7 +196,7 @@ fn b64_decode(input: String, padding: bool) -> Result, AnyError> { let cfg = base64::Config::new(base64::CharacterSet::Standard, true) .decode_allow_trailing_bits(true); - let out = base64::decode_config(&input, cfg).map_err(|err| match err { + let out = base64::decode_config(input, cfg).map_err(|err| match err { base64::DecodeError::InvalidByte(_, _) => { DomExceptionInvalidCharacterError::new( "Failed to decode base64: invalid character",