Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22518 Export monkeys #2637

Merged
merged 1 commit into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 62 additions & 10 deletions icu4c/source/test/intltest/rbbitst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3538,6 +3538,9 @@ RBBILineMonkey::~RBBILineMonkey() {
//
// type = char | word | line | sent | title
//
// export = (path) Export test cases to (path)_(type).txt in the UCD
// test case format.
//
// Example:
// intltest rbbi/RBBITest/TestMonkey@"type=line loop=-1"
//
Expand Down Expand Up @@ -3974,6 +3977,8 @@ void RBBITest::TestMonkey() {
UnicodeString breakType = "all";
Locale locale("en");
UBool useUText = false;
UBool scalarsOnly = false;
std::string exportPath;

if (quick == false) {
loopCount = 10000;
Expand All @@ -3998,6 +4003,19 @@ void RBBITest::TestMonkey() {
p = u.replaceFirst("", status);
}

RegexMatcher pathMatcher(" *export *= *([^ ]+) *", p, 0, status);
if (pathMatcher.find()) {
pathMatcher.group(1, status).toUTF8String(exportPath);
pathMatcher.reset();
p = pathMatcher.replaceFirst("", status);
}

RegexMatcher s(" *scalars_only", p, 0, status);
if (s.find()) {
scalarsOnly = true;
s.reset();
p = s.replaceFirst("", status);
}

// m.reset(p);
if (RegexMatcher(UNICODE_STRING_SIMPLE("\\S"), p, 0, status).find()) {
Expand All @@ -4013,64 +4031,80 @@ void RBBITest::TestMonkey() {
}

if (breakType == "char" || breakType == "all") {
FILE *file = exportPath.empty() ? nullptr : fopen((exportPath + "_char.txt").c_str(), "w");
RBBICharMonkey m;
BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status);
if (U_SUCCESS(status)) {
RunMonkey(bi, m, "char", seed, loopCount, useUText);
RunMonkey(bi, m, "char", seed, loopCount, useUText, file, scalarsOnly);
if (breakType == "all" && useUText==false) {
// Also run a quick test with UText when "all" is specified
RunMonkey(bi, m, "char", seed, loopCount, true);
RunMonkey(bi, m, "char", seed, loopCount, true, nullptr, scalarsOnly);
}
}
else {
errcheckln(status, "Creation of character break iterator failed %s", u_errorName(status));
}
delete bi;
if (file != nullptr) {
fclose(file);
}
}

if (breakType == "word" || breakType == "all") {
logln("Word Break Monkey Test");
FILE *file = exportPath.empty() ? nullptr : fopen((exportPath + "_word.txt").c_str(), "w");
RBBIWordMonkey m;
BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
if (U_SUCCESS(status)) {
RunMonkey(bi, m, "word", seed, loopCount, useUText);
RunMonkey(bi, m, "word", seed, loopCount, useUText, file, scalarsOnly);
}
else {
errcheckln(status, "Creation of word break iterator failed %s", u_errorName(status));
}
delete bi;
if (file != nullptr) {
fclose(file);
}
}

if (breakType == "line" || breakType == "all") {
logln("Line Break Monkey Test");
FILE *file = exportPath.empty() ? nullptr : fopen((exportPath + "_line.txt").c_str(), "w");
RBBILineMonkey m;
BreakIterator *bi = BreakIterator::createLineInstance(locale, status);
if (loopCount >= 10) {
loopCount = loopCount / 5; // Line break runs slower than the others.
}
if (U_SUCCESS(status)) {
RunMonkey(bi, m, "line", seed, loopCount, useUText);
RunMonkey(bi, m, "line", seed, loopCount, useUText, file, scalarsOnly);
}
else {
errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status));
}
delete bi;
if (file != nullptr) {
fclose(file);
}
}

if (breakType == "sent" || breakType == "all" ) {
logln("Sentence Break Monkey Test");
FILE *file = exportPath.empty() ? nullptr : fopen((exportPath + "_sent.txt").c_str(), "w");
RBBISentMonkey m;
BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status);
if (loopCount >= 10) {
loopCount = loopCount / 10; // Sentence runs slower than the other break types
}
if (U_SUCCESS(status)) {
RunMonkey(bi, m, "sent", seed, loopCount, useUText);
RunMonkey(bi, m, "sent", seed, loopCount, useUText, file, scalarsOnly);
}
else {
errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status));
}
delete bi;
if (file != nullptr) {
fclose(file);
}
}

#endif
Expand All @@ -4079,14 +4113,19 @@ void RBBITest::TestMonkey() {
//
// Run a RBBI monkey test. Common routine, for all break iterator types.
// Parameters:
// bi - the break iterator to use
// mk - MonkeyKind, abstraction for obtaining expected results
// name - Name of test (char, word, etc.) for use in error messages
// seed - Seed for starting random number generator (parameter from user)
// bi - the break iterator to use
// mk - MonkeyKind, abstraction for obtaining expected results
// name - Name of test (char, word, etc.) for use in error messages
// seed - Seed for starting random number generator (parameter from user)
// numIterations
// exportFile - Pointer to a file to which the test cases will be written in
// UCD format. May be null.
// scalarsOnly - Only test sequences of Unicode scalar values; if this is false,
// arbitrary sequences of code points (including unpaired surrogates)
// are tested.
//
void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t seed,
int32_t numIterations, UBool useUText) {
int32_t numIterations, UBool useUText, FILE *exportFile, UBool scalarsOnly) {

#if !UCONFIG_NO_REGULAR_EXPRESSIONS

Expand Down Expand Up @@ -4151,6 +4190,9 @@ void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name
errln("%s:%d c < 0", __FILE__, __LINE__);
break;
}
if (scalarsOnly && U16_IS_SURROGATE(c)) {
continue;
}
// Do not assemble a supplementary character from randomly generated separate surrogates.
// (It could be a dictionary character)
if (U16_IS_TRAIL(c) && testText.length() > 0 && U16_IS_LEAD(testText.charAt(testText.length()-1))) {
Expand Down Expand Up @@ -4267,6 +4309,16 @@ void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name
}
}

if (exportFile != nullptr) {
for (i = 0; i < testText.length();) {
fprintf(exportFile, expectedBreaks[i] ? "÷ " : "× ");
char32_t const c = testText.char32At(i);
fprintf(exportFile, "%04X ", static_cast<uint32_t>(c));
i += U16_LENGTH(c);
}
fprintf(exportFile, expectedBreaks[testText.length()] ? "÷ # 🐒\n" : "× # 🐒\n");
}

// Compare the expected and actual results.
for (i=0; i<=testText.length(); i++) {
const char *errorType = nullptr;
Expand Down
4 changes: 3 additions & 1 deletion icu4c/source/test/intltest/rbbitst.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#if !UCONFIG_NO_BREAK_ITERATION

#include <stdio.h>

#include <memory>

#include "intltest.h"
Expand Down Expand Up @@ -122,7 +124,7 @@ class RBBITest: public IntlTest {
**/

void RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t seed,
int32_t loopCount, UBool useUText);
int32_t loopCount, UBool useUText, FILE *exportFile, UBool scalarsOnly);

// Run one of the Unicode Consortium boundary test data files.
void runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi);
Expand Down
Loading