Skip to content

Commit

Permalink
ICU-22479 Add a new fuzzer to test more Locale methods
Browse files Browse the repository at this point in the history
See #2576
  • Loading branch information
FrankYFTang authored and Squash Bot committed Sep 5, 2023
1 parent af8dc57 commit 6e48e24
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 1 deletion.
2 changes: 1 addition & 1 deletion icu4c/source/test/fuzzer/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcd
DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"'
LIBS = $(LIBCTESTFW) $(LIBICUTOOLUTIL) $(LIBICUIO) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)

FUZZER_TARGETS = break_iterator_fuzzer collator_compare_fuzzer collator_rulebased_fuzzer converter_fuzzer locale_fuzzer number_format_fuzzer ucasemap_fuzzer uloc_canonicalize_fuzzer uloc_for_language_tag_fuzzer uloc_get_name_fuzzer uloc_is_right_to_left_fuzzer uloc_open_keywords_fuzzer unicode_string_codepage_create_fuzzer uregex_open_fuzzer
FUZZER_TARGETS = break_iterator_fuzzer collator_compare_fuzzer collator_rulebased_fuzzer converter_fuzzer locale_fuzzer locale_morph_fuzzer number_format_fuzzer ucasemap_fuzzer uloc_canonicalize_fuzzer uloc_for_language_tag_fuzzer uloc_get_name_fuzzer uloc_is_right_to_left_fuzzer uloc_open_keywords_fuzzer unicode_string_codepage_create_fuzzer uregex_open_fuzzer

OBJECTS = $(FUZZER_TARGETS:%=%.o)
OBJECTS += fuzzer_driver.o locale_util.o
Expand Down
109 changes: 109 additions & 0 deletions icu4c/source/test/fuzzer/locale_morph_fuzzer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// Fuzzer for ICU Locales.

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <set>
#include <string>
#include <vector>

#include "unicode/locid.h"
#include "unicode/localpointer.h"
#include "unicode/stringpiece.h"

#include "locale_util.h"

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size < 1) return 0;
icu::StringPiece fuzzData(reinterpret_cast<const char *>(data), size);
uint8_t rnd = *fuzzData.data();
fuzzData.remove_prefix(1);
const std::string input = MakeZeroTerminatedInput(
(const uint8_t*)(fuzzData.data()), fuzzData.length());

icu::Locale locale(input.c_str());
UErrorCode status = U_ZERO_ERROR;
switch(rnd % 8) {
case 0:
locale.addLikelySubtags(status);
break;
case 1:
locale.minimizeSubtags(status);
break;
case 2:
locale.canonicalize(status);
break;
case 3:
{
icu::LocalPointer<icu::StringEnumeration> senum(
locale.createKeywords(status), status);
while (U_SUCCESS(status) &&
(senum->next(nullptr, status)) != nullptr) {
// noop
}
}
break;
case 4:
{
icu::LocalPointer<icu::StringEnumeration> senum(
locale.createUnicodeKeywords(status), status);
while (U_SUCCESS(status) &&
(senum->next(nullptr, status)) != nullptr) {
// noop
}
}
break;
case 5:
{
char buf[256];
icu::CheckedArrayByteSink sink(buf, rnd);
locale.toLanguageTag(sink, status);
}
break;
case 6:
{
std::set<std::string> keys;
locale.getKeywords<std::string>(
std::insert_iterator<decltype(keys)>(keys, keys.begin()),
status);
if (U_SUCCESS(status)) {
char buf[256];
icu::CheckedArrayByteSink sink(buf, rnd);
for (std::set<std::string>::iterator it=keys.begin();
it!=keys.end();
++it) {
locale.getKeywordValue(
icu::StringPiece(it->c_str(), it->length()), sink,
status);
}
}
}
break;
case 7:
{
std::set<std::string> keys;
locale.getUnicodeKeywords<std::string>(
std::insert_iterator<decltype(keys)>(keys, keys.begin()),
status);
if (U_SUCCESS(status)) {
char buf[256];
icu::CheckedArrayByteSink sink(buf, rnd);
for (std::set<std::string>::iterator it=keys.begin();
it!=keys.end();
++it) {
locale.getUnicodeKeywordValue(
icu::StringPiece(it->c_str(), it->length()), sink,
status);
}
}
}
break;
default:
break;
}
return EXIT_SUCCESS;
}
30 changes: 30 additions & 0 deletions icu4c/source/test/fuzzer/locale_morph_fuzzer_seed_corpus.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
1sr-Cyrl-RS
2zz-UND
3de_DE
4De_dE
5en-US-u-islamcal
6zh-CN-a-myext-x-private
7en-a-myext-b-another
8de-419-DE
9a-DE
0ar-a-aaa-b-bbb-a-ccc
asl-rozaj
bsl-rozaj-biske
cde-CH-1901
dsl-IT-nedis
ehy-Latn-IT-arevela
faz-Arab-x-AZE-derbend
gCompletely bad text for locale, even includes spaces and interpunction.!?
hfr-u-nu-arabext
ide-u-nu-beng-nu-bali-nu-khmr
jfil-u-ca-buddhist
kzh-Hant-CN-u-nu-arabext-ca-islamicc
lja-u-ca-japanese
mhr-HR-u-ca-roc
n1234
oAAAAaaaaaaaVVVV_-=+
pja%-x-%e3
qyq-U-98x-M85E-9pE-85Y-8xU985YE-89xxUy-xEyq-U85Y-8xE8-q5Y-8xU-9xE
rUN-u-jTKMwi-a0q-aeae-KaG-aab-2aG2-36C-2uzeal-STqROK-U36-366-U86-83S-c3SZEC-SCG-1366-SG66-KMi-a0ae-qae-KaG-aab-1a3
ssr-sr-u-z2r-4su-nms-5rsu-mns-6um-s5su-msu-ins1-7rzx-ianu-ssd-ss5r-d0r-U22su-n5sx-lvar-5su-ssu-nax-lvarUd-uimxE-112
tHR-u-roc85Y-4xU-d0r-U22x-lvariant-ims-0d0U22-js17zr-rsm-u56su-csu-ins1-ins17Rz-zax-ianu-ssd-d0U22-js1-7rzrsm-u56su-csu-ins1-ins17Rz-z

0 comments on commit 6e48e24

Please sign in to comment.