Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22479 Add a new fuzzer to test more Locale methods #2576

Merged
merged 1 commit into from
Sep 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion icu4c/source/test/fuzzer/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcd
DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"'
LIBS = $(LIBCTESTFW) $(LIBICUTOOLUTIL) $(LIBICUIO) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)

FUZZER_TARGETS = break_iterator_fuzzer collator_compare_fuzzer collator_rulebased_fuzzer converter_fuzzer locale_fuzzer number_format_fuzzer ucasemap_fuzzer uloc_canonicalize_fuzzer uloc_for_language_tag_fuzzer uloc_get_name_fuzzer uloc_is_right_to_left_fuzzer uloc_open_keywords_fuzzer unicode_string_codepage_create_fuzzer uregex_open_fuzzer
FUZZER_TARGETS = break_iterator_fuzzer collator_compare_fuzzer collator_rulebased_fuzzer converter_fuzzer locale_fuzzer locale_morph_fuzzer number_format_fuzzer ucasemap_fuzzer uloc_canonicalize_fuzzer uloc_for_language_tag_fuzzer uloc_get_name_fuzzer uloc_is_right_to_left_fuzzer uloc_open_keywords_fuzzer unicode_string_codepage_create_fuzzer uregex_open_fuzzer

OBJECTS = $(FUZZER_TARGETS:%=%.o)
OBJECTS += fuzzer_driver.o locale_util.o
Expand Down
109 changes: 109 additions & 0 deletions icu4c/source/test/fuzzer/locale_morph_fuzzer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// Fuzzer for ICU Locales.

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <set>
#include <string>
#include <vector>

#include "unicode/locid.h"
#include "unicode/localpointer.h"
#include "unicode/stringpiece.h"

#include "locale_util.h"

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (size < 1) return 0;
icu::StringPiece fuzzData(reinterpret_cast<const char *>(data), size);
uint8_t rnd = *fuzzData.data();
fuzzData.remove_prefix(1);
const std::string input = MakeZeroTerminatedInput(
(const uint8_t*)(fuzzData.data()), fuzzData.length());

icu::Locale locale(input.c_str());
UErrorCode status = U_ZERO_ERROR;
switch(rnd % 8) {
case 0:
locale.addLikelySubtags(status);
break;
case 1:
locale.minimizeSubtags(status);
break;
case 2:
locale.canonicalize(status);
break;
case 3:
{
icu::LocalPointer<icu::StringEnumeration> senum(
locale.createKeywords(status), status);
while (U_SUCCESS(status) &&
(senum->next(nullptr, status)) != nullptr) {
// noop
}
}
break;
case 4:
{
icu::LocalPointer<icu::StringEnumeration> senum(
locale.createUnicodeKeywords(status), status);
while (U_SUCCESS(status) &&
(senum->next(nullptr, status)) != nullptr) {
// noop
}
}
break;
case 5:
{
char buf[256];
icu::CheckedArrayByteSink sink(buf, rnd);
locale.toLanguageTag(sink, status);
}
break;
case 6:
{
std::set<std::string> keys;
locale.getKeywords<std::string>(
std::insert_iterator<decltype(keys)>(keys, keys.begin()),
status);
if (U_SUCCESS(status)) {
char buf[256];
icu::CheckedArrayByteSink sink(buf, rnd);
for (std::set<std::string>::iterator it=keys.begin();
it!=keys.end();
++it) {
locale.getKeywordValue(
icu::StringPiece(it->c_str(), it->length()), sink,
status);
}
}
}
break;
case 7:
{
std::set<std::string> keys;
locale.getUnicodeKeywords<std::string>(
std::insert_iterator<decltype(keys)>(keys, keys.begin()),
status);
if (U_SUCCESS(status)) {
char buf[256];
icu::CheckedArrayByteSink sink(buf, rnd);
for (std::set<std::string>::iterator it=keys.begin();
it!=keys.end();
++it) {
locale.getUnicodeKeywordValue(
icu::StringPiece(it->c_str(), it->length()), sink,
status);
}
}
}
break;
default:
break;
}
return EXIT_SUCCESS;
}
30 changes: 30 additions & 0 deletions icu4c/source/test/fuzzer/locale_morph_fuzzer_seed_corpus.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
1sr-Cyrl-RS
2zz-UND
3de_DE
4De_dE
5en-US-u-islamcal
6zh-CN-a-myext-x-private
7en-a-myext-b-another
8de-419-DE
9a-DE
0ar-a-aaa-b-bbb-a-ccc
asl-rozaj
bsl-rozaj-biske
cde-CH-1901
dsl-IT-nedis
ehy-Latn-IT-arevela
faz-Arab-x-AZE-derbend
gCompletely bad text for locale, even includes spaces and interpunction.!?
hfr-u-nu-arabext
ide-u-nu-beng-nu-bali-nu-khmr
jfil-u-ca-buddhist
kzh-Hant-CN-u-nu-arabext-ca-islamicc
lja-u-ca-japanese
mhr-HR-u-ca-roc
n1234
oAAAAaaaaaaaVVVV_-=+
pja%-x-%e3
qyq-U-98x-M85E-9pE-85Y-8xU985YE-89xxUy-xEyq-U85Y-8xE8-q5Y-8xU-9xE
rUN-u-jTKMwi-a0q-aeae-KaG-aab-2aG2-36C-2uzeal-STqROK-U36-366-U86-83S-c3SZEC-SCG-1366-SG66-KMi-a0ae-qae-KaG-aab-1a3
ssr-sr-u-z2r-4su-nms-5rsu-mns-6um-s5su-msu-ins1-7rzx-ianu-ssd-ss5r-d0r-U22su-n5sx-lvar-5su-ssu-nax-lvarUd-uimxE-112
tHR-u-roc85Y-4xU-d0r-U22x-lvariant-ims-0d0U22-js17zr-rsm-u56su-csu-ins1-ins17Rz-zax-ianu-ssd-d0U22-js1-7rzrsm-u56su-csu-ins1-ins17Rz-z
Loading