-
Notifications
You must be signed in to change notification settings - Fork 338
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add buffer.transcode for nodejs_compat
- Loading branch information
Showing
8 changed files
with
322 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
// Copyright (c) 2017-2022 Cloudflare, Inc. | ||
// Licensed under the Apache 2.0 license found in the LICENSE file or at: | ||
// https://opensource.org/licenses/Apache-2.0 | ||
// Copyright Joyent and Node contributors. All rights reserved. MIT license. | ||
|
||
#include "i18n.h" | ||
|
||
#include <workerd/jsg/exception.h> | ||
|
||
#include <unicode/putil.h> | ||
#include <unicode/timezone.h> | ||
#include <unicode/uchar.h> | ||
#include <unicode/uclean.h> | ||
#include <unicode/ucnv.h> | ||
#include <unicode/udata.h> | ||
#include <unicode/uidna.h> | ||
#include <unicode/ulocdata.h> | ||
#include <unicode/urename.h> | ||
#include <unicode/ustring.h> | ||
#include <unicode/utf16.h> | ||
#include <unicode/utf8.h> | ||
#include <unicode/utypes.h> | ||
#include <unicode/uvernum.h> | ||
#include <unicode/uversion.h> | ||
|
||
namespace workerd::api::node { | ||
|
||
namespace i18n { | ||
|
||
namespace { | ||
|
||
struct ConverterDisposer : public kj::Disposer { | ||
static const ConverterDisposer INSTANCE; | ||
void disposeImpl(void* pointer) const override { | ||
ucnv_close(reinterpret_cast<UConverter*>(pointer)); | ||
} | ||
}; | ||
|
||
const ConverterDisposer ConverterDisposer::INSTANCE; | ||
|
||
const char* getEncodingName(Encoding input) { | ||
switch (input) { | ||
case Encoding::ASCII: | ||
return "us-ascii"; | ||
case Encoding::LATIN1: | ||
return "iso8859-1"; | ||
case Encoding::UCS2: | ||
return "utf16le"; | ||
case Encoding::UTF8: | ||
return "utf-8"; | ||
default: | ||
KJ_UNREACHABLE; | ||
} | ||
} | ||
|
||
typedef kj::Maybe<kj::Array<kj::byte>> (*TranscodeImpl)(kj::ArrayPtr<kj::byte> source, | ||
Encoding fromEncoding, Encoding toEncoding); | ||
|
||
kj::Maybe<kj::Array<kj::byte>> TranscodeDefault(kj::ArrayPtr<kj::byte> source, | ||
Encoding fromEncoding, Encoding toEncoding) { | ||
Converter to(toEncoding); | ||
std::string substitude(to.minSize(), '?'); | ||
to.setSubstitudeChars(substitude); | ||
Converter from(fromEncoding); | ||
|
||
auto limit = source.size() + to.maxSize(); | ||
auto out = kj::heapArray<kj::byte>(limit); | ||
char* target = out.asChars().begin(); | ||
const char* source_ = source.asChars().begin(); | ||
UErrorCode status{}; | ||
ucnv_convertEx(to.conv(), from.conv(), &target, target + limit, &source_, source_ + source.size(), | ||
nullptr, nullptr, nullptr, nullptr, true, true, &status); | ||
if (U_SUCCESS(status)) { | ||
return out.slice(0, target - out.asChars().begin()).attach(kj::mv(out)); | ||
} | ||
|
||
return kj::none; | ||
} | ||
|
||
kj::Maybe<kj::Array<kj::byte>> TranscodeToUCS2(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding, | ||
Encoding toEncoding) { | ||
UErrorCode status{}; | ||
const size_t length_in_chars = source.size() * sizeof(UChar); | ||
Converter from(fromEncoding); | ||
auto out = kj::heapArray<UChar>(source.size()); | ||
const auto source_ = source.asChars().begin(); | ||
ucnv_toUChars(from.conv(), out.begin(), length_in_chars, source_, source.size(), &status); | ||
if (U_SUCCESS(status)) { | ||
return out.asBytes().attach(kj::mv(out)); | ||
} | ||
return kj::none; | ||
} | ||
|
||
kj::Maybe<kj::Array<kj::byte>> TranscodeFromUCS2(kj::ArrayPtr<kj::byte> source, | ||
Encoding fromEncoding, Encoding toEncoding) { | ||
UErrorCode status{}; | ||
KJ_STACK_ARRAY(kj::byte, sourcebuf, 1024, 0, 1024); | ||
Converter to(toEncoding); | ||
std::string substitude(to.minSize(), '?'); | ||
to.setSubstitudeChars(substitude); | ||
|
||
const size_t length_in_chars = source.size() * sizeof(UChar); | ||
sourcebuf.copyFrom(source.slice(0, length_in_chars)); | ||
|
||
auto destbuf = kj::heapArray<kj::byte>(length_in_chars); | ||
const auto source_ = reinterpret_cast<const UChar*>(sourcebuf.asChars().begin()); | ||
auto len = ucnv_fromUChars(to.conv(), destbuf.asChars().begin(), length_in_chars, source_, | ||
length_in_chars, &status); | ||
|
||
if (U_SUCCESS(status)) { | ||
return destbuf.slice(0, len).attach(kj::mv(destbuf)); | ||
} | ||
|
||
return kj::none; | ||
} | ||
|
||
kj::Maybe<kj::Array<kj::byte>> TranscodeUcs2FromUtf8(kj::ArrayPtr<kj::byte> source, | ||
Encoding fromEncoding, Encoding toEncoding) { | ||
return kj::none; | ||
} | ||
|
||
kj::Maybe<kj::Array<kj::byte>> TranscodeUtf8FromUcs2(kj::ArrayPtr<kj::byte> source, | ||
Encoding fromEncoding, Encoding toEncoding) { | ||
return kj::none; | ||
} | ||
|
||
} // namespace | ||
|
||
Converter::Converter(Encoding encoding, kj::StringPtr substitude) { | ||
UErrorCode status = U_ZERO_ERROR; | ||
auto name = getEncodingName(encoding); | ||
auto conv = ucnv_open(name, &status); | ||
KJ_ASSERT(U_SUCCESS(status)); | ||
conv_ = kj::Own<UConverter>(conv, ConverterDisposer::INSTANCE); | ||
setSubstitudeChars(substitude); | ||
} | ||
|
||
Converter::Converter(UConverter* converter, kj::StringPtr substitude) | ||
: conv_(converter, ConverterDisposer::INSTANCE) { | ||
setSubstitudeChars(substitude); | ||
} | ||
|
||
UConverter* Converter::conv() const { | ||
return const_cast<UConverter*>(conv_.get()); | ||
} | ||
|
||
size_t Converter::maxSize() const { | ||
KJ_ASSERT_NONNULL(conv_.get()); | ||
return ucnv_getMaxCharSize(conv_.get()); | ||
} | ||
|
||
size_t Converter::minSize() const { | ||
KJ_ASSERT_NONNULL(conv_.get()); | ||
return ucnv_getMinCharSize(conv_.get()); | ||
} | ||
|
||
void Converter::reset() { | ||
KJ_ASSERT_NONNULL(conv_.get()); | ||
ucnv_reset(conv_.get()); | ||
} | ||
|
||
void Converter::setSubstitudeChars(kj::StringPtr sub) { | ||
KJ_ASSERT_NONNULL(conv_.get()); | ||
UErrorCode status = U_ZERO_ERROR; | ||
ucnv_setSubstChars(conv_.get(), sub.begin(), sub.size(), &status); | ||
KJ_ASSERT(U_SUCCESS(status)); | ||
} | ||
|
||
kj::Array<kj::byte> transcode(kj::ArrayPtr<kj::byte> source, Encoding fromEncoding, | ||
Encoding toEncoding) { | ||
TranscodeImpl transcode_function = &TranscodeDefault; | ||
switch (fromEncoding) { | ||
case Encoding::ASCII: | ||
case Encoding::LATIN1: | ||
if (toEncoding == Encoding::UCS2) { | ||
transcode_function = &TranscodeToUCS2; | ||
} | ||
break; | ||
case Encoding::UTF8: | ||
if (toEncoding == Encoding::UCS2) { | ||
transcode_function = &TranscodeUcs2FromUtf8; | ||
} | ||
break; | ||
case Encoding::UCS2: | ||
switch (toEncoding) { | ||
case Encoding::UCS2: | ||
transcode_function = &TranscodeDefault; | ||
break; | ||
case Encoding::UTF8: | ||
transcode_function = &TranscodeUtf8FromUcs2; | ||
break; | ||
default: | ||
transcode_function = &TranscodeFromUCS2; | ||
} | ||
default: | ||
KJ_UNREACHABLE; | ||
} | ||
|
||
return JSG_REQUIRE_NONNULL(transcode_function(source, fromEncoding, toEncoding), Error, | ||
"Unable to transcode buffer"); | ||
} | ||
|
||
} // namespace i18n | ||
|
||
} // namespace workerd::api::node |
Oops, something went wrong.