-
-
Notifications
You must be signed in to change notification settings - Fork 738
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ICU-22479 Add a new fuzzer to test more Locale methods
See #2576
- Loading branch information
1 parent
af8dc57
commit 6e48e24
Showing
3 changed files
with
140 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
// © 2019 and later: Unicode, Inc. and others. | ||
// License & terms of use: http://www.unicode.org/copyright.html | ||
|
||
// Fuzzer for ICU Locales. | ||
|
||
#include <algorithm> | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <cstdlib> | ||
#include <set> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "unicode/locid.h" | ||
#include "unicode/localpointer.h" | ||
#include "unicode/stringpiece.h" | ||
|
||
#include "locale_util.h" | ||
|
||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { | ||
if (size < 1) return 0; | ||
icu::StringPiece fuzzData(reinterpret_cast<const char *>(data), size); | ||
uint8_t rnd = *fuzzData.data(); | ||
fuzzData.remove_prefix(1); | ||
const std::string input = MakeZeroTerminatedInput( | ||
(const uint8_t*)(fuzzData.data()), fuzzData.length()); | ||
|
||
icu::Locale locale(input.c_str()); | ||
UErrorCode status = U_ZERO_ERROR; | ||
switch(rnd % 8) { | ||
case 0: | ||
locale.addLikelySubtags(status); | ||
break; | ||
case 1: | ||
locale.minimizeSubtags(status); | ||
break; | ||
case 2: | ||
locale.canonicalize(status); | ||
break; | ||
case 3: | ||
{ | ||
icu::LocalPointer<icu::StringEnumeration> senum( | ||
locale.createKeywords(status), status); | ||
while (U_SUCCESS(status) && | ||
(senum->next(nullptr, status)) != nullptr) { | ||
// noop | ||
} | ||
} | ||
break; | ||
case 4: | ||
{ | ||
icu::LocalPointer<icu::StringEnumeration> senum( | ||
locale.createUnicodeKeywords(status), status); | ||
while (U_SUCCESS(status) && | ||
(senum->next(nullptr, status)) != nullptr) { | ||
// noop | ||
} | ||
} | ||
break; | ||
case 5: | ||
{ | ||
char buf[256]; | ||
icu::CheckedArrayByteSink sink(buf, rnd); | ||
locale.toLanguageTag(sink, status); | ||
} | ||
break; | ||
case 6: | ||
{ | ||
std::set<std::string> keys; | ||
locale.getKeywords<std::string>( | ||
std::insert_iterator<decltype(keys)>(keys, keys.begin()), | ||
status); | ||
if (U_SUCCESS(status)) { | ||
char buf[256]; | ||
icu::CheckedArrayByteSink sink(buf, rnd); | ||
for (std::set<std::string>::iterator it=keys.begin(); | ||
it!=keys.end(); | ||
++it) { | ||
locale.getKeywordValue( | ||
icu::StringPiece(it->c_str(), it->length()), sink, | ||
status); | ||
} | ||
} | ||
} | ||
break; | ||
case 7: | ||
{ | ||
std::set<std::string> keys; | ||
locale.getUnicodeKeywords<std::string>( | ||
std::insert_iterator<decltype(keys)>(keys, keys.begin()), | ||
status); | ||
if (U_SUCCESS(status)) { | ||
char buf[256]; | ||
icu::CheckedArrayByteSink sink(buf, rnd); | ||
for (std::set<std::string>::iterator it=keys.begin(); | ||
it!=keys.end(); | ||
++it) { | ||
locale.getUnicodeKeywordValue( | ||
icu::StringPiece(it->c_str(), it->length()), sink, | ||
status); | ||
} | ||
} | ||
} | ||
break; | ||
default: | ||
break; | ||
} | ||
return EXIT_SUCCESS; | ||
} |
30 changes: 30 additions & 0 deletions
30
icu4c/source/test/fuzzer/locale_morph_fuzzer_seed_corpus.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
1sr-Cyrl-RS | ||
2zz-UND | ||
3de_DE | ||
4De_dE | ||
5en-US-u-islamcal | ||
6zh-CN-a-myext-x-private | ||
7en-a-myext-b-another | ||
8de-419-DE | ||
9a-DE | ||
0ar-a-aaa-b-bbb-a-ccc | ||
asl-rozaj | ||
bsl-rozaj-biske | ||
cde-CH-1901 | ||
dsl-IT-nedis | ||
ehy-Latn-IT-arevela | ||
faz-Arab-x-AZE-derbend | ||
gCompletely bad text for locale, even includes spaces and interpunction.!? | ||
hfr-u-nu-arabext | ||
ide-u-nu-beng-nu-bali-nu-khmr | ||
jfil-u-ca-buddhist | ||
kzh-Hant-CN-u-nu-arabext-ca-islamicc | ||
lja-u-ca-japanese | ||
mhr-HR-u-ca-roc | ||
n1234 | ||
oAAAAaaaaaaaVVVV_-=+ | ||
pja%-x-%e3 | ||
qyq-U-98x-M85E-9pE-85Y-8xU985YE-89xxUy-xEyq-U85Y-8xE8-q5Y-8xU-9xE | ||
rUN-u-jTKMwi-a0q-aeae-KaG-aab-2aG2-36C-2uzeal-STqROK-U36-366-U86-83S-c3SZEC-SCG-1366-SG66-KMi-a0ae-qae-KaG-aab-1a3 | ||
ssr-sr-u-z2r-4su-nms-5rsu-mns-6um-s5su-msu-ins1-7rzx-ianu-ssd-ss5r-d0r-U22su-n5sx-lvar-5su-ssu-nax-lvarUd-uimxE-112 | ||
tHR-u-roc85Y-4xU-d0r-U22x-lvariant-ims-0d0U22-js17zr-rsm-u56su-csu-ins1-ins17Rz-zax-ianu-ssd-d0U22-js1-7rzrsm-u56su-csu-ins1-ins17Rz-z |