diff --git a/icu4c/source/test/fuzzer/Makefile.in b/icu4c/source/test/fuzzer/Makefile.in index 259e6b0a7a1a..4b0f610694a5 100644 --- a/icu4c/source/test/fuzzer/Makefile.in +++ b/icu4c/source/test/fuzzer/Makefile.in @@ -33,7 +33,7 @@ CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcd DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"' LIBS = $(LIBCTESTFW) $(LIBICUTOOLUTIL) $(LIBICUIO) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M) -FUZZER_TARGETS = break_iterator_fuzzer collator_compare_fuzzer collator_rulebased_fuzzer converter_fuzzer locale_fuzzer number_format_fuzzer ucasemap_fuzzer uloc_canonicalize_fuzzer uloc_for_language_tag_fuzzer uloc_get_name_fuzzer uloc_is_right_to_left_fuzzer uloc_open_keywords_fuzzer unicode_string_codepage_create_fuzzer uregex_open_fuzzer +FUZZER_TARGETS = break_iterator_fuzzer collator_compare_fuzzer collator_rulebased_fuzzer converter_fuzzer locale_fuzzer locale_morph_fuzzer number_format_fuzzer ucasemap_fuzzer uloc_canonicalize_fuzzer uloc_for_language_tag_fuzzer uloc_get_name_fuzzer uloc_is_right_to_left_fuzzer uloc_open_keywords_fuzzer unicode_string_codepage_create_fuzzer uregex_open_fuzzer OBJECTS = $(FUZZER_TARGETS:%=%.o) OBJECTS += fuzzer_driver.o locale_util.o diff --git a/icu4c/source/test/fuzzer/locale_morph_fuzzer.cpp b/icu4c/source/test/fuzzer/locale_morph_fuzzer.cpp new file mode 100644 index 000000000000..e3d2b7dc3423 --- /dev/null +++ b/icu4c/source/test/fuzzer/locale_morph_fuzzer.cpp @@ -0,0 +1,109 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// Fuzzer for ICU Locales. + +#include +#include +#include +#include +#include +#include +#include + +#include "unicode/locid.h" +#include "unicode/localpointer.h" +#include "unicode/stringpiece.h" + +#include "locale_util.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + if (size < 1) return 0; + icu::StringPiece fuzzData(reinterpret_cast(data), size); + uint8_t rnd = *fuzzData.data(); + fuzzData.remove_prefix(1); + const std::string input = MakeZeroTerminatedInput( + (const uint8_t*)(fuzzData.data()), fuzzData.length()); + + icu::Locale locale(input.c_str()); + UErrorCode status = U_ZERO_ERROR; + switch(rnd % 8) { + case 0: + locale.addLikelySubtags(status); + break; + case 1: + locale.minimizeSubtags(status); + break; + case 2: + locale.canonicalize(status); + break; + case 3: + { + icu::LocalPointer senum( + locale.createKeywords(status), status); + while (U_SUCCESS(status) && + (senum->next(nullptr, status)) != nullptr) { + // noop + } + } + break; + case 4: + { + icu::LocalPointer senum( + locale.createUnicodeKeywords(status), status); + while (U_SUCCESS(status) && + (senum->next(nullptr, status)) != nullptr) { + // noop + } + } + break; + case 5: + { + char buf[256]; + icu::CheckedArrayByteSink sink(buf, rnd); + locale.toLanguageTag(sink, status); + } + break; + case 6: + { + std::set keys; + locale.getKeywords( + std::insert_iterator(keys, keys.begin()), + status); + if (U_SUCCESS(status)) { + char buf[256]; + icu::CheckedArrayByteSink sink(buf, rnd); + for (std::set::iterator it=keys.begin(); + it!=keys.end(); + ++it) { + locale.getKeywordValue( + icu::StringPiece(it->c_str(), it->length()), sink, + status); + } + } + } + break; + case 7: + { + std::set keys; + locale.getUnicodeKeywords( + std::insert_iterator(keys, keys.begin()), + status); + if (U_SUCCESS(status)) { + char buf[256]; + icu::CheckedArrayByteSink sink(buf, rnd); + for (std::set::iterator it=keys.begin(); + it!=keys.end(); + ++it) { + locale.getUnicodeKeywordValue( + icu::StringPiece(it->c_str(), it->length()), sink, + status); + } + } + } + break; + default: + break; + } + return EXIT_SUCCESS; +} diff --git a/icu4c/source/test/fuzzer/locale_morph_fuzzer_seed_corpus.txt b/icu4c/source/test/fuzzer/locale_morph_fuzzer_seed_corpus.txt new file mode 100644 index 000000000000..544a4064502c --- /dev/null +++ b/icu4c/source/test/fuzzer/locale_morph_fuzzer_seed_corpus.txt @@ -0,0 +1,30 @@ +1sr-Cyrl-RS +2zz-UND +3de_DE +4De_dE +5en-US-u-islamcal +6zh-CN-a-myext-x-private +7en-a-myext-b-another +8de-419-DE +9a-DE +0ar-a-aaa-b-bbb-a-ccc +asl-rozaj +bsl-rozaj-biske +cde-CH-1901 +dsl-IT-nedis +ehy-Latn-IT-arevela +faz-Arab-x-AZE-derbend +gCompletely bad text for locale, even includes spaces and interpunction.!? +hfr-u-nu-arabext +ide-u-nu-beng-nu-bali-nu-khmr +jfil-u-ca-buddhist +kzh-Hant-CN-u-nu-arabext-ca-islamicc +lja-u-ca-japanese +mhr-HR-u-ca-roc +n1234 +oAAAAaaaaaaaVVVV_-=+ +pja%-x-%e3 +qyq-U-98x-M85E-9pE-85Y-8xU985YE-89xxUy-xEyq-U85Y-8xE8-q5Y-8xU-9xE +rUN-u-jTKMwi-a0q-aeae-KaG-aab-2aG2-36C-2uzeal-STqROK-U36-366-U86-83S-c3SZEC-SCG-1366-SG66-KMi-a0ae-qae-KaG-aab-1a3 +ssr-sr-u-z2r-4su-nms-5rsu-mns-6um-s5su-msu-ins1-7rzx-ianu-ssd-ss5r-d0r-U22su-n5sx-lvar-5su-ssu-nax-lvarUd-uimxE-112 +tHR-u-roc85Y-4xU-d0r-U22x-lvariant-ims-0d0U22-js17zr-rsm-u56su-csu-ins1-ins17Rz-zax-ianu-ssd-d0U22-js1-7rzrsm-u56su-csu-ins1-ins17Rz-z