From 8b8e1bfe61a8e49ac80e466a045372797209c9c9 Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Mon, 4 Sep 2023 16:03:15 -0700 Subject: [PATCH] ICU-22261 Add tech preview implementation for MessageFormat 2.0 to icu4c --- icu4c/source/common/unicode/utypes.h | 15 +- icu4c/source/common/utypes.cpp | 9 + icu4c/source/i18n/messageformat2.cpp | 784 ++++++++ icu4c/source/i18n/messageformat2_builder.cpp | 162 ++ icu4c/source/i18n/messageformat2_checker.cpp | 212 +++ icu4c/source/i18n/messageformat2_context.cpp | 633 +++++++ .../source/i18n/messageformat2_data_model.cpp | 860 +++++++++ .../messageformat2_formatting_context.cpp | 770 ++++++++ .../i18n/messageformat2_function_registry.cpp | 594 ++++++ icu4c/source/i18n/messageformat2_parser.cpp | 1647 ++++++++++++++++ .../source/i18n/messageformat2_serializer.cpp | 284 +++ icu4c/source/i18n/messageformat2_utils_impl.h | 306 +++ icu4c/source/i18n/sources.txt | 9 + icu4c/source/i18n/unicode/messageformat2.h | 482 +++++ .../i18n/unicode/messageformat2_checker.h | 55 + .../i18n/unicode/messageformat2_context.h | 417 +++++ .../i18n/unicode/messageformat2_data_model.h | 1652 +++++++++++++++++ .../messageformat2_formatting_context.h | 487 +++++ .../messageformat2_function_registry.h | 405 ++++ .../i18n/unicode/messageformat2_macros.h | 126 ++ .../i18n/unicode/messageformat2_utils.h | 364 ++++ icu4c/source/test/intltest/Makefile.in | 1 + icu4c/source/test/intltest/itformat.cpp | 2 + .../test/intltest/messageformat2test.cpp | 904 +++++++++ .../source/test/intltest/messageformat2test.h | 222 +++ .../intltest/messageformat2test_builtin.cpp | 276 +++ .../intltest/messageformat2test_custom.cpp | 761 ++++++++ .../intltest/messageformat2test_features.cpp | 1089 +++++++++++ .../intltest/messageformat2test_fromjson.cpp | 541 ++++++ .../test/intltest/messageformat2test_icu.cpp | 211 +++ .../test/intltest/messageformat2test_utils.h | 319 ++++ 31 files changed, 14598 insertions(+), 1 deletion(-) create mode 100644 icu4c/source/i18n/messageformat2.cpp create mode 100644 icu4c/source/i18n/messageformat2_builder.cpp create mode 100644 icu4c/source/i18n/messageformat2_checker.cpp create mode 100644 icu4c/source/i18n/messageformat2_context.cpp create mode 100644 icu4c/source/i18n/messageformat2_data_model.cpp create mode 100644 icu4c/source/i18n/messageformat2_formatting_context.cpp create mode 100644 icu4c/source/i18n/messageformat2_function_registry.cpp create mode 100644 icu4c/source/i18n/messageformat2_parser.cpp create mode 100644 icu4c/source/i18n/messageformat2_serializer.cpp create mode 100644 icu4c/source/i18n/messageformat2_utils_impl.h create mode 100644 icu4c/source/i18n/unicode/messageformat2.h create mode 100644 icu4c/source/i18n/unicode/messageformat2_checker.h create mode 100644 icu4c/source/i18n/unicode/messageformat2_context.h create mode 100644 icu4c/source/i18n/unicode/messageformat2_data_model.h create mode 100644 icu4c/source/i18n/unicode/messageformat2_formatting_context.h create mode 100644 icu4c/source/i18n/unicode/messageformat2_function_registry.h create mode 100644 icu4c/source/i18n/unicode/messageformat2_macros.h create mode 100644 icu4c/source/i18n/unicode/messageformat2_utils.h create mode 100644 icu4c/source/test/intltest/messageformat2test.cpp create mode 100644 icu4c/source/test/intltest/messageformat2test.h create mode 100644 icu4c/source/test/intltest/messageformat2test_builtin.cpp create mode 100644 icu4c/source/test/intltest/messageformat2test_custom.cpp create mode 100644 icu4c/source/test/intltest/messageformat2test_features.cpp create mode 100644 icu4c/source/test/intltest/messageformat2test_fromjson.cpp create mode 100644 icu4c/source/test/intltest/messageformat2test_icu.cpp create mode 100644 icu4c/source/test/intltest/messageformat2test_utils.h diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index f890d5d1dbbe..1ef03bdcde1a 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -438,6 +438,7 @@ typedef enum UErrorCode { U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UErrorCode warning value. @@ -568,12 +569,24 @@ typedef enum UErrorCode { U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */ U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */ + + /* MessageFormat 2.0 errors */ + U_UNRESOLVED_VARIABLE_ERROR, /** A variable is referred to but not bound by any definition */ + U_SYNTAX_ERROR, /** Includes all syntax errors */ + U_UNKNOWN_FUNCTION_ERROR, /** An annotation refers to a function not defined by the standard or custom function registry */ + U_VARIANT_KEY_MISMATCH_ERROR, /** In a match-construct, one or more variants had a different number of keys from the number of selectors */ + U_FORMATTING_ERROR, /** Covers all runtime errors: for example, an internally inconsistent set of options. */ + U_NONEXHAUSTIVE_PATTERN_ERROR, /** In a match-construct, the variants do not cover all possible values */ + U_DUPLICATE_OPTION_NAME_ERROR, /** In an annotation, the same option name appears more than once */ + U_SELECTOR_ERROR, /** A selector function is applied to an operand of the wrong type */ + U_MISSING_SELECTOR_ANNOTATION_ERROR, /** A selector expression evaluates to an unannotated operand */ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT = 0x10114, + U_FMT_PARSE_ERROR_LIMIT = 0x10125, #endif // U_HIDE_DEPRECATED_API /* diff --git a/icu4c/source/common/utypes.cpp b/icu4c/source/common/utypes.cpp index 63e05b1249b6..7e43f54d7a1f 100644 --- a/icu4c/source/common/utypes.cpp +++ b/icu4c/source/common/utypes.cpp @@ -129,6 +129,15 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_FORMAT_INEXACT_ERROR", "U_NUMBER_ARG_OUTOFBOUNDS_ERROR", "U_NUMBER_SKELETON_SYNTAX_ERROR", + "U_UNRESOLVED_VARIABLE_ERROR", + "U_SYNTAX_ERROR", + "U_UNKNOWN_FUNCTION_ERROR", + "U_VARIANT_KEY_MISMATCH_ERROR", + "U_FORMATTING_ERROR", + "U_NONEXHAUSTIVE_PATTERN_ERROR", + "U_DUPLICATE_OPTION_NAME_ERROR", + "U_SELECTOR_ERROR", + "U_MISSING_SELECTOR_ANNOTATION_ERROR" }; static const char * const diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp new file mode 100644 index 000000000000..de084ba782f4 --- /dev/null +++ b/icu4c/source/i18n/messageformat2.cpp @@ -0,0 +1,784 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "unicode/messageformat2_data_model.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +using Binding = MessageFormatDataModel::Binding; +using Bindings = MessageFormatDataModel::Bindings; +using Expression = MessageFormatDataModel::Expression; +using ExpressionList = MessageFormatDataModel::ExpressionList; +using Key = MessageFormatDataModel::Key; +using KeyList = MessageFormatDataModel::KeyList; +using Literal = MessageFormatDataModel::Literal; +using OptionMap = MessageFormatDataModel::OptionMap; +using Operand = MessageFormatDataModel::Operand; +using Operator = MessageFormatDataModel::Operator; +using Pattern = MessageFormatDataModel::Pattern; +using PatternPart = MessageFormatDataModel::PatternPart; +using SelectorKeys = MessageFormatDataModel::SelectorKeys; +using VariantMap = MessageFormatDataModel::VariantMap; + +using PrioritizedVariantList = ImmutableVector; + +#define TEXT_SELECTOR UnicodeString("select") + +// ------------------------------------------------------ +// Formatting + +// The result of formatting a literal is just itself. +static const Formattable& evalLiteral(const Literal& lit) { + return lit.getContents(); +} + +// Assumes that `var` is a message argument; sets the input in the context +// to the argument's value. +void MessageFormatter::evalArgument(const VariableName& var, ExpressionContext& context) const { + const MessageContext& c = context.messageContext(); + + U_ASSERT(c.hasGlobal(var)); + // The fallback for a variable name is itself. + context.setFallbackTo(var); + if (c.hasGlobalAsFormattable(var)) { + context.setInput(c.getGlobalAsFormattable(var)); + } else { + context.setInput(c.getGlobalAsObject(var)); + } +} + +// Sets the input to the contents of the literal +void MessageFormatter::formatLiteral(const Literal& lit, ExpressionContext& context) const { + // The fallback for a literal is itself. + context.setFallbackTo(lit); + context.setInput(evalLiteral(lit)); +} + +void MessageFormatter::formatOperand(const Environment& env, const Operand& rand, ExpressionContext& context, UErrorCode &status) const { + CHECK_ERROR(status); + if (rand.isNull()) { + context.setNoOperand(); + return; + } + if (rand.isVariable()) { + // Check if it's local or global + // TODO: Currently, this code allows name shadowing, but depending on the + // resolution of: + // https://github.com/unicode-org/message-format-wg/issues/310 + // it might need to forbid it. + const VariableName& var = rand.asVariable(); + // TODO: Currently, this code implements lazy evaluation of locals. + // That is, the environment binds names to a closure, not a resolved value. + // Eager vs. lazy evaluation is an open issue: + // see https://github.com/unicode-org/message-format-wg/issues/299 + + // Look up the variable in the environment + const Closure* rhs = env.lookup(var); + // If rhs is null, the variable must not be a local + if (rhs != nullptr) { + // Format the expression using the environment from the closure + formatExpression(rhs->getEnv(), rhs->getExpr(), context, status); + return; + } + // Use fallback per + // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution + context.setFallbackTo(var); + // Variable wasn't found in locals -- check if it's global + if (context.messageContext().hasGlobal(var)) { + evalArgument(var, context); + return; + } else { + // Unbound variable -- set a resolution error + context.messageContext().getErrors().setUnresolvedVariable(var, status); + return; + } + } else if (rand.isLiteral()) { + formatLiteral(rand.asLiteral(), context); + return; + } +} + +// Resolves a function's options, recording the value of each option in the context +void MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, ExpressionContext& context, UErrorCode& status) const { + CHECK_ERROR(status); + + int32_t pos = OptionMap::FIRST; + LocalPointer rhsContext; + while (true) { + UnicodeString k; + const Operand* v; + if (!options.next(pos, k, v)) { + break; + } + U_ASSERT(v != nullptr); + // Options are fully evaluated before calling the function + // Create a new context for formatting the right-hand side of the option + rhsContext.adoptInstead(context.create(status)); + CHECK_ERROR(status); + // Format the operand in its own context + formatOperand(env, *v, *rhsContext, status); + // If formatting succeeded, pass the string + if (rhsContext->hasStringOutput()) { + context.setStringOption(k, rhsContext->getStringOutput(), status); + } else if (rhsContext->hasFormattableInput()) { + // (Fall back to the input if the result was a formatted number) + const Formattable& f = rhsContext->getFormattableInput(); + switch (f.getType()) { + case Formattable::Type::kDate: { + context.setDateOption(k, f.getDate(), status); + break; + } + case Formattable::Type::kDouble: { + context.setNumericOption(k, f.getDouble(), status); + break; + } + case Formattable::Type::kLong: { + context.setNumericOption(k, f.getLong(), status); + break; + } + case Formattable::Type::kInt64: { + context.setNumericOption(k, f.getInt64(), status); + break; + } + case Formattable::Type::kString: { + context.setStringOption(k, f.getString(), status); + break; + } + default: { + // Options with array or object types are ignored + continue; + } + } + } else if (rhsContext->hasObjectInput()) { + context.setObjectOption(k, rhsContext->getObjectInputPointer(), status); + } else { + // Ignore fallbacks + U_ASSERT(rhsContext->isFallback()); + } + } +} + +// Formats an expression using `globalEnv` for the values of variables +void MessageFormatter::formatExpression(const Environment& globalEnv, const Expression& expr, ExpressionContext& context, UErrorCode &status) const { + CHECK_ERROR(status); + + // Formatting error + if (expr.isReserved()) { + context.messageContext().getErrors().setReservedError(status); + U_ASSERT(context.isFallback()); + return; + } + + const Operand& rand = expr.getOperand(); + // Format the operand (formatOperand handles the case of a null operand) + formatOperand(globalEnv, rand, context, status); + + if (expr.isFunctionCall()) { + const Operator& rator = expr.getOperator(); + const FunctionName& functionName = rator.getFunctionName(); + const OptionMap& options = rator.getOptions(); + // Resolve the options + resolveOptions(globalEnv, options, context, status); + + // Don't call the function on error values + if (context.isFallback()) { + return; + } + + // Call the formatter function + context.evalFormatterCall(functionName, status); + // If the call was successful, nothing more to do + if (context.hasOutput() && U_SUCCESS(status)) { + return; + } else if (!(context.messageContext().getErrors().hasError())) { + // Set formatting warning if formatting function had no output + // but didn't set an error or warning + context.messageContext().getErrors().setFormattingError(functionName.toString(), status); + } + + // If we reached this point, the formatter is null -- + // must have been a previous unknown function warning + if (rand.isNull()) { + context.setFallbackTo(functionName); + } + context.setFallback(); + return; + } +} + +// Formats each text and expression part of a pattern, appending the results to `result` +void MessageFormatter::formatPattern(MessageContext& globalContext, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const { + CHECK_ERROR(status); + + LocalPointer context; + for (int32_t i = 0; i < pat.numParts(); i++) { + const PatternPart* part = pat.getPart(i); + U_ASSERT(part != nullptr); + if (part->isText()) { + result += part->asText(); + } else { + // Create a new context to evaluate the expression part + context.adoptInstead(ExpressionContext::create(globalContext, status)); + CHECK_ERROR(status); + // Format the expression + formatExpression(globalEnv, part->contents(), *context, status); + // Force full evaluation, e.g. applying default formatters to + // unformatted input (or formatting numbers as strings) + context->formatToString(locale, status); + CHECK_ERROR(status); + result += context->getStringOutput(); + } + } +} + +// ------------------------------------------------------ +// Selection + +// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors +// res is a vector of ExpressionContexts +void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, const ExpressionList& selectors, UErrorCode &status, UVector& res) const { + CHECK_ERROR(status); + + // 1. Let res be a new empty list of resolved values that support selection. + // (Implicit, since `res` is an out-parameter) + // 2. For each expression exp of the message's selectors + LocalPointer rv; + for (int32_t i = 0; i < selectors.length(); i++) { + rv.adoptInstead(ExpressionContext::create(context, status)); + CHECK_ERROR(status); + // 2i. Let rv be the resolved value of exp. + formatSelectorExpression(env, *selectors.get(i), *rv, status); + if (rv->hasSelector()) { + // 2ii. If selection is supported for rv: + // (True if this code has been reached) + } else { + // 2iii. Else: + // Let nomatch be a resolved value for which selection always fails. + // Append nomatch as the last element of the list res. + // Emit a Selection Error. + // (Note: in this case, rv, being a fallback, serves as `nomatch`) + #ifdef _DEBUG + const Errors& err = rv->messageContext().getErrors(); + U_ASSERT(err.hasUnknownFunctionError() || err.hasSelectorError()); + U_ASSERT(rv->isFallback()); + #endif + } + // 2ii(a). Append rv as the last element of the list res. + // (Also fulfills 2iii) + res.adoptElement(rv.orphan(), status); + } +} + +// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences +// `keys` and `matches` are vectors of strings +void MessageFormatter::matchSelectorKeys(const UVector& keys, ExpressionContext& rv, UErrorCode& status, UVector& matches) const { + CHECK_ERROR(status); + + if (rv.isFallback()) { + // Return an empty list of matches + return; + } + U_ASSERT(rv.hasSelector()); + + rv.evalPendingSelectorCall(keys, matches, status); +} + +UBool stringsEqual(const UElement k1, const UElement k2) { + return (*(static_cast(k1.pointer)) == *(static_cast(k2.pointer))); +} + +// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences +// `res` is a vector of ExpressionContexts; `pref` is a vector of vectors of strings; +void MessageFormatter::resolvePreferences(const UVector& res, const VariantMap& variants, UErrorCode &status, UVector& pref) const { + CHECK_ERROR(status); + + // 1. Let pref be a new empty list of lists of strings. + // (Implicit, since `pref` is an out-parameter) + LocalPointer keys; + LocalPointer ks; + int32_t numVariants = variants.size(); + LocalPointer matches(new UVector(numVariants, status)); + CHECK_ERROR(status); + matches->setComparer(stringsEqual); + matches->setDeleter(uprv_deleteUObject); + // 2. For each index i in res + for (int32_t i = 0; i < res.size(); i++) { + // 2i. Let keys be a new empty list of strings. + keys.adoptInstead(new UVector(numVariants, status)); + keys->setDeleter(uprv_deleteUObject); + CHECK_ERROR(status); + // 2ii. For each variant `var` of the message + int32_t pos = VariantMap::FIRST; + while (true) { + const SelectorKeys* selectorKeys; + const Pattern* p; // Not used + if (!variants.next(pos, selectorKeys, p)) { + break; + } + // Note: Here, `var` names the key list of `var`, + // not a Variant itself + const KeyList& var = selectorKeys->getKeys(); + // 2ii(a). Let `key` be the `var` key at position i. + U_ASSERT(i < var.length()); // established by semantic check in formatSelectors() + const Key& key = *var.get(i); + // 2ii(b). If `key` is not the catch-all key '*' + if (!key.isWildcard()) { + // 2ii(b)(a) Assert that key is a literal. + // (Not needed) + // 2ii(b)(b) Let `ks` be the resolved value of `key`. + ks.adoptInstead(new UnicodeString(key.asLiteral().stringContents())); + if (!ks.isValid()) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + // 2ii(b)(c) Append `ks` as the last element of the list `keys`. + keys->adoptElement(ks.orphan(), status); + CHECK_ERROR(status); + } + } + // 2iii. Let `rv` be the resolved value at index `i` of `res`. + ExpressionContext* rv = static_cast(res[i]); + U_ASSERT(rv != nullptr); + // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys) + matchSelectorKeys(*keys, *rv, status, *matches); + // 2v. Append `matches` as the last element of the list `pref` + pref.adoptElement(matches.orphan(), status); + matches.adoptInstead(new UVector(numVariants, status)); + CHECK_ERROR(status); + matches->setComparer(stringsEqual); + matches->setDeleter(uprv_deleteUObject); + } +} + +// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#filter-variants +// `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants +void filterVariants(const VariantMap& variants, const UVector& pref, UErrorCode &status, UVector& vars) { + CHECK_ERROR(status); + + // 1. Let `vars` be a new empty list of variants. + // (Not needed since `vars` is an out-parameter) + // 2. For each variant `var` of the message: + int32_t pos = VariantMap::FIRST; + while (true) { + const SelectorKeys* selectorKeys; + const Pattern* p; + if (!variants.next(pos, selectorKeys, p)) { + break; + } + // Note: Here, `var` names the key list of `var`, + // not a Variant itself + const KeyList& var = selectorKeys->getKeys(); + // 2i. For each index `i` in `pref`: + bool noMatch = false; + for (int32_t i = 0; i < pref.size(); i++) { + // 2i(a). Let `key` be the `var` key at position `i`. + U_ASSERT(i < var.length()); + const Key& key = *var.get(i); + // 2i(b). If key is the catch-all key '*': + if (key.isWildcard()) { + // 2i(b)(a). Continue the inner loop on pref. + continue; + } + // 2i(c). Assert that `key` is a literal. + // (Not needed) + // 2i(d). Let `ks` be the resolved value of `key`. + UnicodeString ks = key.asLiteral().stringContents(); + // 2i(e). Let `matches` be the list of strings at index `i` of `pref`. + const UVector& matches = *(static_cast(pref[i])); + // 2i(f). If `matches` includes `ks` + if (matches.contains(&ks)) { + // 2i(f)(a). Continue the inner loop on `pref`. + continue; + } + // 2i(g). Else: + // 2i(g)(a). Continue the outer loop on message variants. + noMatch = true; + break; + } + if (!noMatch) { + // Append `var` as the last element of the list `vars`. + LocalPointer tuple(new PrioritizedVariant(-1, *selectorKeys, *p)); + if (!tuple.isValid()) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + vars.adoptElement(tuple.orphan(), status); + CHECK_ERROR(status); + } + } +} + +int32_t comparePrioritizedVariants(UElement left, UElement right) { + const PrioritizedVariant& tuple1 = *(static_cast(left.pointer)); + const PrioritizedVariant& tuple2 = *(static_cast(right.pointer)); + if (tuple1.priority < tuple2.priority) { + return -1; + } + if (tuple1.priority == tuple2.priority) { + return 0; + } + return 1; +} + +int32_t comparecomparePrioritizedVariants(UElement left, UElement right) { + const PrioritizedVariant& tuple1 = *(static_cast(left.pointer)); + const PrioritizedVariant& tuple2 = *(static_cast(right.pointer)); + if (tuple1.priority < tuple2.priority) { + return -1; + } + if (tuple1.priority == tuple2.priority) { + return 0; + } + return 1; +} + +// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants +// `sortable` is a vector of PrioritizedVariants +static void sortVariantTuples(UVector& sortable, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + sortable.sort(comparePrioritizedVariants, errorCode); +} + +// See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants +// Leaves the preferred variant as element 0 in `sortable` +// Note: this sorts in-place, so `sortable` is just `vars` +// `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants +void sortVariants(const UVector& pref, UErrorCode& status, UVector& vars) { + CHECK_ERROR(status); + +// Note: steps 1 and 2 are omitted since we use `vars` as `sortable` (we sort in-place) + // 1. Let `sortable` be a new empty list of (integer, variant) tuples. + // (Not needed since `sortable` is an out-parameter) + // 2. For each variant `var` of `vars` + // 2i. Let tuple be a new tuple (-1, var). + // 2ii. Append `tuple` as the last element of the list `sortable`. + + // 3. Let `len` be the integer count of items in `pref`. + int32_t len = pref.size(); + // 4. Let `i` be `len` - 1. + int32_t i = len - 1; + // 5. While i >= 0: + while (i >= 0) { + // 5i. Let `matches` be the list of strings at index `i` of `pref`. + const UVector& matches = *(static_cast(pref[i])); + // 5ii. Let `minpref` be the integer count of items in `matches`. + int32_t minpref = matches.size(); + // 5iii. For each tuple `tuple` of `sortable`: + for (int32_t j = 0; j < vars.size(); j++) { + PrioritizedVariant* tuple = static_cast(vars[j]); + // 5iii(a). Let matchpref be an integer with the value minpref. + int32_t matchpref = minpref; + // 5iii(b). Let `key` be the tuple variant key at position `i`. + const KeyList& tupleVariantKeys = tuple->keys.getKeys(); + U_ASSERT(i < ((int32_t) tupleVariantKeys.length())); // Given by earlier semantic checking + const Key& key = *tupleVariantKeys.get(((int32_t) i)); + // 5iii(c) If `key` is not the catch-all key '*': + if (!key.isWildcard()) { + // 5iii(c)(a). Assert that `key` is a literal. + // (Not needed) + // 5iii(c)(b). Let `ks` be the resolved value of `key`. + UnicodeString ks = key.asLiteral().stringContents(); + // 5iii(c)(c) Let matchpref be the integer position of ks in `matches`. + matchpref = matches.indexOf(&ks); + U_ASSERT(matchpref != -1); + } + // 5iii(d) Set the `tuple` integer value as matchpref. + tuple->priority = matchpref; + } + // 5iv. Set `sortable` to be the result of calling the method SortVariants(`sortable`) + sortVariantTuples(vars, status); + // 5v. Set `i` to be `i` - 1. + i--; + } + // The caller is responsible for steps 6 and 7 + // 6. Let `var` be the `variant` element of the first element of `sortable`. + // 7. Select the pattern of `var` +} + + +// Evaluate the operand +void MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, ExpressionContext& context, UErrorCode &status) const { + CHECK_ERROR(status); + + if (rand.isNull()) { + // Nothing to do + return; + } else if (rand.isLiteral()) { + // If there's already a function name set, this shouldn't have been evaluated + U_ASSERT(!context.hasFunctionName()); + formatLiteral(rand.asLiteral(), context); + } else { + // Must be variable + const VariableName& var = rand.asVariable(); + // Resolve the variable + const Closure* referent = env.lookup(var); + if (referent != nullptr) { + // Resolve the referent + resolveVariables(referent->getEnv(), referent->getExpr(), context, status); + return; + } + // Either this is a global var or an unbound var -- + // either way, it can't be bound to a function call. + context.setFallbackTo(var); + // Check globals + if (context.messageContext().hasGlobal(var)) { + evalArgument(var, context); + } else { + // Unresolved variable -- could be a previous warning. Nothing to resolve + U_ASSERT(context.messageContext().getErrors().hasUnresolvedVariableError()); + } + } +} + +// Evaluate the expression except for not performing the top-level function call +// (which is expected to be a selector, but may not be, in error cases) +void MessageFormatter::resolveVariables(const Environment& env, const Expression& expr, ExpressionContext& context, UErrorCode &status) const { + CHECK_ERROR(status); + + // A `reserved` is an error + if (expr.isReserved()) { + context.messageContext().getErrors().setReservedError(status); + U_ASSERT(context.isFallback()); + return; + } + + // Function call -- resolve the operand and options + if (expr.isFunctionCall()) { + const Operator& rator = expr.getOperator(); + context.setFunctionName(rator.getFunctionName(), status); + resolveOptions(env, rator.getOptions(), context, status); + // Operand may be the null argument, but resolveVariables() handles that + formatOperand(env, expr.getOperand(), context, status); + } else { + resolveVariables(env, expr.getOperand(), context, status); + } +} + +// Leaves `context` either as a fallback with errors, +// or in a state with a pending call to a selector that has been set +void MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, ExpressionContext& context, UErrorCode &status) const { + CHECK_ERROR(status); + + // Resolve expression to determine if it's a function call + resolveVariables(globalEnv, expr, context, status); + + Errors& err = context.messageContext().getErrors(); + + // If there is a selector, then `resolveVariables()` recorded it in the context + if (context.hasSelector()) { + // Check if there was an error + if (context.isFallback()) { + // Use a null expression if it's a syntax or data model warning; + // create a valid (non-fallback) formatted placeholder from the + // fallback string otherwise + if (err.hasSyntaxError() || err.hasDataModelError()) { + U_ASSERT(!context.hasInput()); + } else { + context.promoteFallbackToInput(); + } + } + } else { + // Determine the type of error to set + if (context.hasFunctionName()) { + const FunctionName& fn = context.getFunctionName(); + // A selector used as a formatter is a selector error + if (context.hasFormatter()) { + err.setSelectorError(fn, status); + } else { + // Otherwise, the error is an unknown function error + err.setUnknownFunction(fn, status); + } + } else { + // No function name -- this is a missing selector annotation error + err.setMissingSelectorAnnotation(status); + } + context.clearFunctionName(); + context.clearFunctionOptions(); + context.setFallback(); + } +} + +void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, const ExpressionList& selectors, const VariantMap& variants, UErrorCode &status, UnicodeString& result) const { + CHECK_ERROR(status); + + // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection + + // Resolve Selectors + // res is a vector of ResolvedExpressions + int32_t numSelectors = selectors.length(); + + // vector of ExpressionContexts + LocalPointer res(new UVector(numSelectors, status)); + CHECK_ERROR(status); + res->setDeleter(uprv_deleteUObject); + resolveSelectors(context, env, selectors, status, *res); + + // Resolve Preferences + // pref is a vector of vectors of strings + LocalPointer pref(new UVector(numSelectors, status)); + CHECK_ERROR(status); + pref->setDeleter(uprv_deleteUObject); + resolvePreferences(*res, variants, status, *pref); + + // Filter Variants + // vars is a vector of PrioritizedVariants + LocalPointer vars(new UVector(variants.size(), status)); + CHECK_ERROR(status); + vars->setDeleter(uprv_deleteUObject); + filterVariants(variants, *pref, status, *vars); + + // Sort Variants and select the final pattern + // Note: `sortable` in the spec is just `vars` here, + // which is sorted in-place + sortVariants(*pref, status, *vars); + CHECK_ERROR(status); // needs to be checked to ensure that `sortable` is valid + + // 6. Let `var` be the `variant` element of the first element of `sortable`. + U_ASSERT(vars->size() > 0); // This should have been checked earlier (having 0 variants would be a data model error) + const PrioritizedVariant& var = *(static_cast((*vars)[0])); + // 7. Select the pattern of `var` + const Pattern& pat = var.pat; + + // Format the pattern + formatPattern(context, env, pat, status, result); +} + +PrioritizedVariant::~PrioritizedVariant() {} + +void MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status, UnicodeString& result) const { + CHECK_ERROR(status); + + // Create a new context with the given arguments and the `errors` structure + LocalPointer context(MessageContext::create(*this, arguments, *errors, status)); + CHECK_ERROR(status); + + const MessageFormatDataModel& dataModel = getDataModel(); + + // Note: we currently evaluate variables lazily, + // without memoization. This call is still necessary + // to check out-of-scope uses of local variables in + // right-hand sides (unresolved variable errors can + // only be checked when arguments are known) + + // Check for resolution errors + Checker(dataModel, context->getErrors()).check(status); + + // Create a new environment that will store closures for all local variables + Environment* env = Environment::create(status); + CHECK_ERROR(status); + + // Check for unresolved variable errors + checkDeclarations(*context, env, status); + CHECK_ERROR(status); + LocalPointer globalEnv(env); + + if (!dataModel.hasSelectors()) { + formatPattern(*context, *globalEnv, dataModel.getPattern(), status, result); + } else { + // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value + // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection + Errors& err = context->getErrors(); + if (err.hasSyntaxError() || err.hasDataModelError()) { + result += REPLACEMENT; + } else { + formatSelectors(*context, *globalEnv, dataModel.getSelectors(), dataModel.getVariants(), status, result); + } + } + // Update status according to all errors seen while formatting + context->checkErrors(status); + // Clear resolution and formatting errors, in case this MessageFormatter object + // is used again with different arguments + clearErrors(); + return; +} + +void MessageFormatter::clearErrors() const { + errors->clearResolutionAndFormattingErrors(); +} + +// ---------------------------------------- +// Checking for resolution errors + +void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const OptionMap& options, UErrorCode &status) const { + CHECK_ERROR(status); + + // Check the RHS of each option + int32_t pos = OptionMap::FIRST; + UnicodeString k; // not used + const Operand* rhs; + while(true) { + if (!options.next(pos, k, rhs)) { + break; + } + U_ASSERT(rhs != nullptr); + check(context, localEnv, *rhs, status); + } +} + +void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Operand& rand, UErrorCode &status) const { + CHECK_ERROR(status); + + // Nothing to check for literals + if (rand.isLiteral() || rand.isNull()) { + return; + } + + // Check that variable is in scope + const VariableName& var = rand.asVariable(); + // Check local scope + if (localEnv.lookup(var) != nullptr) { + return; + } + // Check global scope + if (context.hasGlobalAsFormattable(var) || context.hasGlobalAsObject(var)) { + return; + } + context.getErrors().setUnresolvedVariable(var, status); +} + +void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Expression& expr, UErrorCode &status) const { + CHECK_ERROR(status); + + // Check for unresolved variable errors + if (expr.isFunctionCall()) { + const Operator& rator = expr.getOperator(); + const Operand& rand = expr.getOperand(); + check(context, localEnv, rand, status); + check(context, localEnv, rator.getOptions(), status); + } +} + +// Check for resolution errors +void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& env, UErrorCode &status) const { + CHECK_ERROR(status); + + const Bindings& decls = getDataModel().getLocalVariables(); + U_ASSERT(env != nullptr); + + for (int32_t i = 0; i < decls.length(); i++) { + const Binding* decl = decls.get(i); + U_ASSERT(decl != nullptr); + const Expression& rhs = decl->getValue(); + check(context, *env, rhs, status); + + // Add a closure to the global environment, + // memoizing the value of localEnv up to this point + Closure* closure = Closure::create(rhs, *env, status); + CHECK_ERROR(status); + + // Add the LHS to the environment for checking the next declaration + env = Environment::create(decl->getVariable(), closure, env, status); + CHECK_ERROR(status); + } +} +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/messageformat2_builder.cpp b/icu4c/source/i18n/messageformat2_builder.cpp new file mode 100644 index 000000000000..55dd54c438f1 --- /dev/null +++ b/icu4c/source/i18n/messageformat2_builder.cpp @@ -0,0 +1,162 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "unicode/messageformat2_data_model.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +// ------------------------------------- +// Creates a MessageFormat instance based on the pattern. + +// Returns a new (uninitialized) builder +MessageFormatter::Builder* MessageFormatter::builder(UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + LocalPointer tree(new Builder()); + if (!tree.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return tree.orphan(); +} + +MessageFormatter::Builder& MessageFormatter::Builder::setPattern(const UnicodeString& pat) { + hasPattern = true; + pattern = pat; + dataModel = nullptr; + + return *this; +} + +// Precondition: `reg` is non-null +// Does not adopt `reg` +MessageFormatter::Builder& MessageFormatter::Builder::setFunctionRegistry(const FunctionRegistry* reg) { + U_ASSERT(reg != nullptr); + customFunctionRegistry = reg; + return *this; +} + +MessageFormatter::Builder& MessageFormatter::Builder::setLocale(const Locale& loc) { + locale = loc; + return *this; +} + +// Does not adopt `dataModel` +MessageFormatter::Builder& MessageFormatter::Builder::setDataModel(const MessageFormatDataModel* newDataModel) { + U_ASSERT(newDataModel != nullptr); + hasPattern = false; + dataModel = newDataModel; + + return *this; +} + +/* + This build() method is non-destructive, which entails the risk that + its borrowed FunctionRegistry and (if the setDataModel() method was called) + MessageFormatDataModel pointers could become invalidated. +*/ +MessageFormatter* MessageFormatter::Builder::build(UParseError& parseError, UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + + LocalPointer mf(new MessageFormatter(*this, parseError, errorCode)); + if (U_FAILURE(errorCode)) { + return nullptr; + } + return mf.orphan(); +} + +void MessageFormatter::initErrors(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + errors.adoptInstead(Errors::create(errorCode)); +} + +MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UParseError &parseError, + UErrorCode &success) : locale(builder.locale), customFunctionRegistry(builder.customFunctionRegistry) { + CHECK_ERROR(success); + + // Set up the standard function registry + LocalPointer standardFunctionsBuilder(FunctionRegistry::builder(success)); + CHECK_ERROR(success); + + standardFunctionsBuilder->setFormatter(UnicodeString("datetime"), new StandardFunctions::DateTimeFactory(), success) + .setFormatter(UnicodeString("number"), new StandardFunctions::NumberFactory(), success) + .setFormatter(UnicodeString("identity"), new StandardFunctions::IdentityFactory(), success) + .setSelector(UnicodeString("plural"), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success) + .setSelector(UnicodeString("selectordinal"), new StandardFunctions::PluralFactory(UPLURAL_TYPE_ORDINAL), success) + .setSelector(UnicodeString("select"), new StandardFunctions::TextFactory(), success) + .setSelector(UnicodeString("gender"), new StandardFunctions::TextFactory(), success); + standardFunctionRegistry.adoptInstead(standardFunctionsBuilder->build(success)); + CHECK_ERROR(success); + standardFunctionRegistry->checkStandard(); + + initErrors(success); + CHECK_ERROR(success); + + // Validate pattern and build data model + // First, check that exactly one of the pattern and data model are set, but not both + + bool dataModelSet = builder.dataModel != nullptr; + + if ((!builder.hasPattern && !dataModelSet) + || (builder.hasPattern && dataModelSet)) { + success = U_INVALID_STATE_ERROR; + return; + } + + // If data model was set, just assign it + if (dataModelSet) { + ownedDataModel = false; + borrowedDataModel = builder.dataModel; + return; + } + borrowedDataModel = nullptr; + + LocalPointer tree(MessageFormatDataModel::builder(success)); + if (U_FAILURE(success)) { + return; + } + + // Initialize formatter cache + cachedFormatters.adoptInstead(new CachedFormatters(success)); + + // Parse the pattern + LocalPointer parser(Parser::create(builder.pattern, *tree, normalizedInput, *errors, success)); + CHECK_ERROR(success); + parser->parse(parseError, success); + + // Build the data model based on what was parsed + LocalPointer dataModelPtr(tree->build(success)); + if (U_SUCCESS(success)) { + ownedDataModel = true; + dataModel.adoptInstead(dataModelPtr.orphan()); + } +} + +const MessageFormatDataModel& MessageFormatter::getDataModel() const { + U_ASSERT(dataModelOK()); + if (ownedDataModel) { + return *dataModel; + } + return *borrowedDataModel; +} + +bool MessageFormatter::dataModelOK() const { + if (ownedDataModel) { + return dataModel.isValid() && borrowedDataModel == nullptr; + } + return !dataModel.isValid() && borrowedDataModel != nullptr; +} + +MessageFormatter::~MessageFormatter() {} + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/messageformat2_checker.cpp b/icu4c/source/i18n/messageformat2_checker.cpp new file mode 100644 index 000000000000..87f11a3850ee --- /dev/null +++ b/icu4c/source/i18n/messageformat2_checker.cpp @@ -0,0 +1,212 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "unicode/messageformat2_checker.h" +#include "unicode/messageformat2_data_model.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +/* +Checks data model errors +(see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#error-handling ) + +The following are checked here: +Variant Key Mismatch +Missing Fallback Variant (called NonexhaustivePattern here) +Missing Selector Annotation + +(Duplicate option names are checked by the parser) +*/ + +// ------------------------------------------------ + +using Type = TypeEnvironment::Type; + +using Binding = MessageFormatDataModel::Binding; +using Expression = MessageFormatDataModel::Expression; +using ExpressionList = MessageFormatDataModel::ExpressionList; +using KeyList = MessageFormatDataModel::KeyList; +using Operand = MessageFormatDataModel::Operand; +using Pattern = MessageFormatDataModel::Pattern; +using SelectorKeys = MessageFormatDataModel::SelectorKeys; +using VariantMap = MessageFormatDataModel::VariantMap; + +// Type environments +// ----------------- + +TypeEnvironment::TypeEnvironment(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + // initialize `contents` + annotated.adoptInstead(new UVector(errorCode)); + CHECK_ERROR(errorCode); + if (U_FAILURE(errorCode)) { + return; + } + annotated->setDeleter(uprv_deleteUObject); +} + +Type TypeEnvironment::get(const VariableName& var) const { + for (int32_t i = 0; ((int32_t) i) < annotated->size(); i++) { + VariableName* lhs = (VariableName*) (*annotated)[i]; + U_ASSERT(lhs != nullptr); + if (*lhs == var) { + return Annotated; + } + } + return Unannotated; +} + +void TypeEnvironment::extend(const VariableName& var, Type t, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + if (t == Unannotated) { + // Nothing to do, as variables are considered + // unannotated by default + return; + } + + LocalPointer v(new VariableName(var)); + if (!v.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + annotated->adoptElement(v.orphan(), errorCode); +} + +TypeEnvironment::~TypeEnvironment() {} + +// --------------------- + +static bool areDefaultKeys(const KeyList& keys) { + U_ASSERT(keys.length() > 0); + for (int32_t i = 0; i < keys.length(); i++) { + if (!keys.get(i)->isWildcard()) { + return false; + } + } + return true; +} + +void MessageFormatter::Checker::checkVariants(UErrorCode& error) { + CHECK_ERROR(error); + U_ASSERT(dataModel.hasSelectors()); + + // Determine the number of selectors + int32_t numSelectors = dataModel.getSelectors().length(); + + // Check that each variant has a key list with length + // equal to the number of selectors + const VariantMap& variants = dataModel.getVariants(); + int32_t pos = VariantMap::FIRST; + const SelectorKeys* selectorKeys; + const Pattern* pattern; + + // Check that one variant includes only wildcards + bool defaultExists = false; + + while (variants.next(pos, selectorKeys, pattern)) { + const KeyList& keys = selectorKeys->getKeys(); + if (keys.length() != numSelectors) { + // Variant key mismatch + errors.addError(Error::Type::VariantKeyMismatchError, error); + return; + } + defaultExists |= areDefaultKeys(keys); + } + if (!defaultExists) { + errors.addError(Error::Type::NonexhaustivePattern, error); + return; + } +} + +void MessageFormatter::Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& error) { + CHECK_ERROR(error); + + if (selectorExpr.isFunctionCall()) { + return; // No error + } + if (!selectorExpr.isReserved()) { + const Operand& rand = selectorExpr.getOperand(); + if (rand.isVariable()) { + if (t.get(rand.asVariable()) == Type::Annotated) { + return; // No error + } + } + } + // If this code is reached, an error was detected + errors.addError(Error::Type::MissingSelectorAnnotation, error); +} + +void MessageFormatter::Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& error) { + CHECK_ERROR(error); + U_ASSERT(dataModel.hasSelectors()); + + // Check each selector; if it's not annotated, emit a + // "missing selector annotation" error + const ExpressionList& selectors = dataModel.getSelectors(); + for (int32_t i = 0; i < selectors.length(); i++) { + const Expression* expr = selectors.get(i); + U_ASSERT(expr != nullptr); + requireAnnotated(t, *expr, error); + } +} + +Type typeOf(TypeEnvironment& t, const Expression& expr) { + if (expr.isFunctionCall()) { + return Type::Annotated; + } + if (expr.isReserved()) { + return Type::Unannotated; + } + const Operand& rand = expr.getOperand(); + U_ASSERT(!rand.isNull()); + if (rand.isLiteral()) { + return Type::Unannotated; + } + U_ASSERT(rand.isVariable()); + return t.get(rand.asVariable()); +} + +void MessageFormatter::Checker::checkDeclarations(TypeEnvironment& t, UErrorCode& error) { + CHECK_ERROR(error); + + // For each declaration, extend the type environment with its type + // Only a very simple type system is necessary: local variables + // have the type "annotated" or "unannotated". + // Free variables (message arguments) are treated as unannotated. + const MessageFormatDataModel::Bindings& env = dataModel.getLocalVariables(); + for (int32_t i = 0; i < env.length(); i++) { + const Binding* b = env.get(i); + U_ASSERT(b != nullptr); + const Expression& rhs = b->getValue(); + t.extend(b->getVariable(), typeOf(t, rhs), error); + } +} + +void MessageFormatter::Checker::check(UErrorCode& error) { + CHECK_ERROR(error); + + TypeEnvironment typeEnv(error); + checkDeclarations(typeEnv, error); + // Pattern message + if (!dataModel.hasSelectors()) { + return; + } else { + // Selectors message + checkSelectors(typeEnv, error); + checkVariants(error); + } +} + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/icu4c/source/i18n/messageformat2_context.cpp b/icu4c/source/i18n/messageformat2_context.cpp new file mode 100644 index 000000000000..8f4e784e891c --- /dev/null +++ b/icu4c/source/i18n/messageformat2_context.cpp @@ -0,0 +1,633 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2_context.h" +#include "unicode/messageformat2_function_registry.h" +#include "unicode/messageformat2_macros.h" +#include "unicode/messageformat2.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +// The context contains all the information needed to process +// an entire message: arguments, formatter cache, and error list + +// ------------------------------------------------------ +// MessageArguments + +using Arguments = MessageArguments; + +bool Arguments::hasFormattable(const VariableName& arg) const { + U_ASSERT(contents.isValid() && objectContents.isValid()); + return contents->containsKey(arg.identifier()); +} + +bool Arguments::hasObject(const VariableName& arg) const { + U_ASSERT(contents.isValid() && objectContents.isValid()); + return objectContents->containsKey(arg.identifier()); +} + +const Formattable& Arguments::getFormattable(const VariableName& arg) const { + U_ASSERT(hasFormattable(arg)); + const Formattable* result = static_cast(contents->get(arg.identifier())); + U_ASSERT(result != nullptr); + return *result; +} + +const UObject* Arguments::getObject(const VariableName& arg) const { + U_ASSERT(hasObject(arg)); + const UObject* result = static_cast(objectContents->get(arg.identifier())); + U_ASSERT(result != nullptr); + return result; +} + +Arguments::Builder::Builder(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + contents.adoptInstead(new Hashtable(compareVariableName, nullptr, errorCode)); + objectContents.adoptInstead(new Hashtable(compareVariableName, nullptr, errorCode)); + CHECK_ERROR(errorCode); + // The `contents` hashtable owns the values, but does not own the keys + contents->setValueDeleter(uprv_deleteUObject); + // The `objectContents` hashtable does not own the values +} + +Arguments::Builder& Arguments::Builder::add(const UnicodeString& name, const UnicodeString& val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + Formattable* valPtr(ExpressionContext::createFormattable(val, errorCode)); + THIS_ON_ERROR(errorCode); + return add(name, valPtr, errorCode); +} + +Arguments::Builder& Arguments::Builder::addDouble(const UnicodeString& name, double val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + Formattable* valPtr(ExpressionContext::createFormattable(val, errorCode)); + THIS_ON_ERROR(errorCode); + return add(name, valPtr, errorCode); +} + +Arguments::Builder& Arguments::Builder::addInt64(const UnicodeString& name, int64_t val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + Formattable* valPtr(ExpressionContext::createFormattable(val, errorCode)); + THIS_ON_ERROR(errorCode); + return add(name, valPtr, errorCode); +} + +Arguments::Builder& Arguments::Builder::addDate(const UnicodeString& name, UDate val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + Formattable* valPtr(ExpressionContext::createFormattableDate(val, errorCode)); + THIS_ON_ERROR(errorCode); + return add(name, valPtr, errorCode); +} + +Arguments::Builder& Arguments::Builder::addDecimal(const UnicodeString& name, StringPiece val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + Formattable* valPtr(ExpressionContext::createFormattableDecimal(val, errorCode)); + THIS_ON_ERROR(errorCode); + return add(name, valPtr, errorCode); +} + +Arguments::Builder& Arguments::Builder::add(const UnicodeString& name, const UnicodeString* arr, int32_t count, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + Formattable* valPtr(ExpressionContext::createFormattable(arr, count, errorCode)); + THIS_ON_ERROR(errorCode); + return add(name, valPtr, errorCode); +} + +// Does not adopt the object +Arguments::Builder& Arguments::Builder::addObject(const UnicodeString& name, const UObject* obj, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + // This const is safe because the values in the objectContents hash table + // will only be accessed through a (const UObject*) pointer + objectContents->put(name, const_cast(obj), errorCode); + return *this; +} + +// Adopts its argument +Arguments::Builder& Arguments::Builder::add(const UnicodeString& name, Formattable* value, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + U_ASSERT(value != nullptr); + + contents->put(name, value, errorCode); + return *this; +} + +/* static */ MessageArguments::Builder* MessageArguments::builder(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + MessageArguments::Builder* result = new MessageArguments::Builder(errorCode); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +MessageArguments* MessageArguments::Builder::build(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + U_ASSERT(contents.isValid() && objectContents.isValid()); + + LocalPointer contentsCopied(new Hashtable(compareVariableName, nullptr, errorCode)); + LocalPointer objectContentsCopied(new Hashtable(compareVariableName, nullptr, errorCode)); + NULL_ON_ERROR(errorCode); + // The `contents` hashtable owns the values, but does not own the keys + contentsCopied->setValueDeleter(uprv_deleteUObject); + // The `objectContents` hashtable does not own the values + + int32_t pos = UHASH_FIRST; + LocalPointer optionValue; + // Copy the non-objects + while (true) { + const UHashElement* element = contents->nextElement(pos); + if (element == nullptr) { + break; + } + const Formattable& toCopy = *(static_cast(element->value.pointer)); + optionValue.adoptInstead(new Formattable(toCopy)); + if (!optionValue.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + UnicodeString* key = static_cast(element->key.pointer); + contentsCopied->put(*key, optionValue.orphan(), errorCode); + } + // Copy the objects + pos = UHASH_FIRST; + while (true) { + const UHashElement* element = objectContents->nextElement(pos); + if (element == nullptr) { + break; + } + UnicodeString* key = static_cast(element->key.pointer); + objectContentsCopied->put(*key, element->value.pointer, errorCode); + } + MessageArguments* result = new MessageArguments(contentsCopied.orphan(), objectContentsCopied.orphan()); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +MessageArguments::~MessageArguments() {} +MessageArguments::Builder::~Builder() {} + +// Message arguments +// ----------------- + +bool MessageContext::hasGlobalAsObject(const VariableName& v) const { + return arguments.hasObject(v); +} + +bool MessageContext::hasGlobalAsFormattable(const VariableName& v) const { + return arguments.hasFormattable(v); +} + +const UObject* MessageContext::getGlobalAsObject(const VariableName& v) const { + U_ASSERT(hasGlobalAsObject(v)); + return arguments.getObject(v); +} + +const Formattable& MessageContext::getGlobalAsFormattable(const VariableName& v) const { + U_ASSERT(hasGlobalAsFormattable(v)); + return arguments.getFormattable(v); +} + +// ------------------------------------------------------ +// Formatter cache + +const Formatter* CachedFormatters::getFormatter(const FunctionName& f) { + U_ASSERT(cache.isValid()); + return ((Formatter*) cache->get(f.toString())); +} + +void CachedFormatters::setFormatter(const FunctionName& f, Formatter* val, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + U_ASSERT(cache.isValid()); + cache->put(f.toString(), val, errorCode); +} + +CachedFormatters::CachedFormatters(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + cache.adoptInstead(new Hashtable(compareVariableName, nullptr, errorCode)); + CHECK_ERROR(errorCode); + // The cache owns the values + cache->setValueDeleter(uprv_deleteUObject); +} + +// --------------------------------------------------- +// Function registry + + +bool MessageContext::isBuiltInSelector(const FunctionName& functionName) const { + return parent.standardFunctionRegistry->hasSelector(functionName); +} + +bool MessageContext::isBuiltInFormatter(const FunctionName& functionName) const { + return parent.standardFunctionRegistry->hasFormatter(functionName); +} + +// https://github.com/unicode-org/message-format-wg/issues/409 +// Unknown function = unknown function error +// Formatter used as selector = selector error +// Selector used as formatter = formatting error +const SelectorFactory* MessageContext::lookupSelectorFactory(const FunctionName& functionName, UErrorCode& status) const { + NULL_ON_ERROR(status); + + if (isBuiltInSelector(functionName)) { + return parent.standardFunctionRegistry->getSelector(functionName); + } + if (isBuiltInFormatter(functionName)) { + errors.setSelectorError(functionName, status); + return nullptr; + } + if (parent.hasCustomFunctionRegistry()) { + const FunctionRegistry& customFunctionRegistry = parent.getCustomFunctionRegistry(); + const SelectorFactory* customSelector = customFunctionRegistry.getSelector(functionName); + if (customSelector != nullptr) { + return customSelector; + } + if (customFunctionRegistry.getFormatter(functionName) != nullptr) { + errors.setSelectorError(functionName, status); + return nullptr; + } + } + // Either there is no custom function registry and the function + // isn't built-in, or the function doesn't exist in either the built-in + // or custom registry. + // Unknown function error + errors.setUnknownFunction(functionName, status); + return nullptr; +} + +FormatterFactory* MessageContext::lookupFormatterFactory(const FunctionName& functionName, UErrorCode& status) const { + NULL_ON_ERROR(status); + + if (isBuiltInFormatter(functionName)) { + return parent.standardFunctionRegistry->getFormatter(functionName); + } + if (isBuiltInSelector(functionName)) { + errors.setFormattingError(functionName, status); + return nullptr; + } + if (parent.hasCustomFunctionRegistry()) { + const FunctionRegistry& customFunctionRegistry = parent.getCustomFunctionRegistry(); + FormatterFactory* customFormatter = customFunctionRegistry.getFormatter(functionName); + if (customFormatter != nullptr) { + return customFormatter; + } + if (customFunctionRegistry.getSelector(functionName) != nullptr) { + errors.setFormattingError(functionName, status); + return nullptr; + } + } + // Either there is no custom function registry and the function + // isn't built-in, or the function doesn't exist in either the built-in + // or custom registry. + // Unknown function error + errors.setUnknownFunction(functionName, status); + return nullptr; +} + +bool MessageContext::isCustomFormatter(const FunctionName& fn) const { + return parent.hasCustomFunctionRegistry() && parent.getCustomFunctionRegistry().getFormatter(fn) != nullptr; +} + + +bool MessageContext::isCustomSelector(const FunctionName& fn) const { + return parent.hasCustomFunctionRegistry() && parent.getCustomFunctionRegistry().getSelector(fn) != nullptr; +} + +const Formatter* MessageContext::maybeCachedFormatter(const FunctionName& f, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + U_ASSERT(parent.cachedFormatters.isValid()); + + const Formatter* result = parent.cachedFormatters->getFormatter(f); + if (result == nullptr) { + // Create the formatter + + // First, look up the formatter factory for this function + FormatterFactory* formatterFactory = lookupFormatterFactory(f, errorCode); + NULL_ON_ERROR(errorCode); + // If the formatter factory was null, there must have been + // an earlier error/warning + if (formatterFactory == nullptr) { + U_ASSERT(errors.hasUnknownFunctionError() || errors.hasFormattingError()); + return nullptr; + } + NULL_ON_ERROR(errorCode); + + // Create a specific instance of the formatter + Formatter* formatter = formatterFactory->createFormatter(parent.locale, errorCode); + NULL_ON_ERROR(errorCode); + if (formatter == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + parent.cachedFormatters->setFormatter(f, formatter, errorCode); + return formatter; + } else { + return result; + } +} + + +// ------------------------------------------------------- +// MessageContext accessors and constructors + +MessageContext::MessageContext(const MessageFormatter& mf, const MessageArguments& args, Errors& e) : parent(mf), arguments(args), errors(e) {} + +/* static */ MessageContext* MessageContext::create(const MessageFormatter& mf, const MessageArguments& args, Errors& e, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer result(new MessageContext(mf, args, e)); + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +// Errors +// ----------- + +void MessageContext::checkErrors(UErrorCode& status) const { + CHECK_ERROR(status); + errors.checkErrors(status); +} + +void Errors::setReservedError(UErrorCode& status) { + CHECK_ERROR(status); + + Error err(Error::Type::ReservedError); + addError(err, status); +} + +void Errors::setFormattingError(const FunctionName& formatterName, UErrorCode& status) { + CHECK_ERROR(status); + + Error err(Error::Type::FormattingError, formatterName.toString()); + addError(err, status); +} + + +void Errors::setMissingSelectorAnnotation(UErrorCode& status) { + CHECK_ERROR(status); + + Error err(Error::Type::MissingSelectorAnnotation); + addError(err, status); +} + +void Errors::setSelectorError(const FunctionName& selectorName, UErrorCode& status) { + CHECK_ERROR(status); + + Error err(Error::Type::SelectorError, selectorName.toString()); + addError(err, status); +} + +void Errors::setUnknownFunction(const FunctionName& functionName, UErrorCode& status) { + CHECK_ERROR(status); + + Error err(Error::Type::UnknownFunction, functionName.toString()); + addError(err, status); +} + +void Errors::setUnresolvedVariable(const VariableName& v, UErrorCode& status) { + CHECK_ERROR(status); + + Error err(Error::Type::UnresolvedVariable, v.identifier()); + addError(err, status); +} + +Errors* Errors::create(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + return new Errors(errorCode); +} + +Errors::Errors(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + syntaxAndDataModelErrors.adoptInstead(new UVector(errorCode)); + resolutionAndFormattingErrors.adoptInstead(new UVector(errorCode)); + CHECK_ERROR(errorCode); + syntaxAndDataModelErrors->setDeleter(uprv_deleteUObject); + resolutionAndFormattingErrors->setDeleter(uprv_deleteUObject); + dataModelError = false; + formattingError = false; + missingSelectorAnnotationError = false; + selectorError = false; + syntaxError = false; + unknownFunctionError = false; +} + +int32_t Errors::count() const { + return syntaxAndDataModelErrors->size() + resolutionAndFormattingErrors->size(); +} + +bool Errors::hasError() const { + return count() > 0; +} + +void Errors::clearResolutionAndFormattingErrors() { + U_ASSERT(resolutionAndFormattingErrors.isValid()); + resolutionAndFormattingErrors->removeAllElements(); + formattingError = false; + selectorError = false; +} + +void Errors::checkErrors(UErrorCode& status) { + if (status != U_ZERO_ERROR) { + return; + } + + // Just handle the first error + // TODO: Eventually want to return all errors to caller + if (count() == 0) { + return; + } + Error* err; + if (syntaxAndDataModelErrors->size() > 0) { + err = (Error*) (*syntaxAndDataModelErrors)[0]; + } else { + U_ASSERT(resolutionAndFormattingErrors->size() > 0); + err = (Error*) (*resolutionAndFormattingErrors)[0]; + } + switch (err->type) { + case Error::Type::DuplicateOptionName: { + status = U_DUPLICATE_OPTION_NAME_ERROR; + break; + } + case Error::Type::VariantKeyMismatchError: { + status = U_VARIANT_KEY_MISMATCH_ERROR; + break; + } + case Error::Type::NonexhaustivePattern: { + status = U_NONEXHAUSTIVE_PATTERN_ERROR; + break; + } + case Error::Type::UnknownFunction: { + status = U_UNKNOWN_FUNCTION_ERROR; + break; + } + case Error::Type::UnresolvedVariable: { + status = U_UNRESOLVED_VARIABLE_ERROR; + break; + } + case Error::Type::FormattingError: { + status = U_FORMATTING_ERROR; + break; + } + case Error::Type::MissingSelectorAnnotation: { + status = U_MISSING_SELECTOR_ANNOTATION_ERROR; + break; + } + + case Error::Type::ReservedError: { + status = U_UNSUPPORTED_PROPERTY; + break; + } + case Error::Type::SyntaxError: { + status = U_SYNTAX_ERROR; + break; + } + case Error::Type::SelectorError: { + status = U_SELECTOR_ERROR; + break; + } + } +} + +void Errors::addSyntaxError(UErrorCode& status) { + CHECK_ERROR(status); + addError(Error(Error::Type::SyntaxError), status); +} + +void Errors::addError(Error e, UErrorCode& status) { + CHECK_ERROR(status); + + Error* eP = new Error(e); + if (eP == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + switch (e.type) { + case Error::Type::SyntaxError: { + syntaxError = true; + syntaxAndDataModelErrors->adoptElement(eP, status); + break; + } + case Error::Type::DuplicateOptionName: { + dataModelError = true; + syntaxAndDataModelErrors->adoptElement(eP, status); + break; + } + case Error::Type::VariantKeyMismatchError: { + dataModelError = true; + syntaxAndDataModelErrors->adoptElement(eP, status); + break; + } + case Error::Type::NonexhaustivePattern: { + dataModelError = true; + syntaxAndDataModelErrors->adoptElement(eP, status); + break; + } + case Error::Type::UnresolvedVariable: { + unresolvedVariableError = true; + syntaxAndDataModelErrors->adoptElement(eP, status); + break; + } + case Error::Type::FormattingError: { + formattingError = true; + resolutionAndFormattingErrors->adoptElement(eP, status); + break; + } + case Error::Type::MissingSelectorAnnotation: { + missingSelectorAnnotationError = true; + dataModelError = true; + syntaxAndDataModelErrors->adoptElement(eP, status); + break; + } + case Error::Type::ReservedError: { + dataModelError = true; + syntaxAndDataModelErrors->adoptElement(eP, status); + break; + } + case Error::Type::SelectorError: { + selectorError = true; + resolutionAndFormattingErrors->adoptElement(eP, status); + break; + } + case Error::Type::UnknownFunction: { + unknownFunctionError = true; + resolutionAndFormattingErrors->adoptElement(eP, status); + break; + } + } +} + +Errors::~Errors() {} +Error::~Error() {} + +MessageContext::~MessageContext() {} + + +// ---------------- Environments and closures + +Environment* Environment::create(const VariableName& var, Closure* c, Environment* parent, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + Environment* result = new NonEmptyEnvironment(var, c, parent); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result; +} + +Environment* Environment::create(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + Environment* result = new EmptyEnvironment(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result; +} + +Closure* Closure::create(const Expression& expr, const Environment& env, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + Closure* result = new Closure(expr, env); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result; +} + +const Closure* EmptyEnvironment::lookup(const VariableName& v) const { + (void) v; + return nullptr; +} + +const Closure* NonEmptyEnvironment::lookup(const VariableName& v) const { + if (v == var) { + U_ASSERT(rhs.isValid()); + return rhs.getAlias(); + } + return parent->lookup(v); +} + +Environment::~Environment() {} +NonEmptyEnvironment::~NonEmptyEnvironment() {} +EmptyEnvironment::~EmptyEnvironment() {} + +Closure::~Closure() {} + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/messageformat2_data_model.cpp b/icu4c/source/i18n/messageformat2_data_model.cpp new file mode 100644 index 000000000000..77b402bab351 --- /dev/null +++ b/icu4c/source/i18n/messageformat2_data_model.cpp @@ -0,0 +1,860 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "unicode/messageformat2_data_model.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +using Binding = MessageFormatDataModel::Binding; +using Expression = MessageFormatDataModel::Expression; +using ExpressionList = MessageFormatDataModel::ExpressionList; +using Key = MessageFormatDataModel::Key; +using KeyList = MessageFormatDataModel::KeyList; +using Literal = MessageFormatDataModel::Literal; +using OptionMap = MessageFormatDataModel::OptionMap; +using Operand = MessageFormatDataModel::Operand; +using Operator = MessageFormatDataModel::Operator; +using Pattern = MessageFormatDataModel::Pattern; +using PatternPart = MessageFormatDataModel::PatternPart; +using Reserved = MessageFormatDataModel::Reserved; +using SelectorKeys = MessageFormatDataModel::SelectorKeys; +using VariantMap = MessageFormatDataModel::VariantMap; + +// Implementation + +//------------------ SelectorKeys + +SelectorKeys::Builder& SelectorKeys::Builder::add(Key* key, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + keys->add(key, errorCode); + return *this; +} + +const KeyList& SelectorKeys::getKeys() const { + U_ASSERT(!isBogus()); + return *keys; +} + +SelectorKeys::Builder::Builder(UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + keys.adoptInstead(KeyList::builder(errorCode)); +} + +SelectorKeys::SelectorKeys(const SelectorKeys& other) : keys(new KeyList(*(other.keys))) { + U_ASSERT(!other.isBogus()); +} + +SelectorKeys::Builder::~Builder() {} + +//------------------ VariableName + +UnicodeString VariableName::declaration() const { + UnicodeString result(DOLLAR); + result += variableName; + return result; +} + +VariableName::~VariableName() {} + +//------------------ Literal + +UnicodeString Literal::quotedString() const { + UnicodeString result(PIPE); + result += stringContents(); + result += PIPE; + return result; +} + +const UnicodeString& Literal::stringContents() const { + U_ASSERT(contents.getType() == Formattable::Type::kString); + return contents.getString(); +} + +Literal::~Literal() {} + +//------------------ Operand + +/* static */ Operand* Operand::create(const VariableName& s, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + Operand* result = new Operand(s); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +// Literal +/* static */ Operand* Operand::create(const Literal& lit, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + Operand* result = new Operand(lit); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +// Null operand +/* static */ Operand* Operand::create(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + Operand* result = new Operand(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +Operand::Operand(const Operand& other) : var(other.var), lit(other.lit), type(other.type) {} + +UBool Operand::isVariable() const { return type == Type::VARIABLE; } +UBool Operand::isLiteral() const { return type == Type::LITERAL; } +UBool Operand::isNull() const { return type == Type::NULL_OPERAND; } + +const Literal& Operand::asLiteral() const { + U_ASSERT(isLiteral()); + return lit; +} + +const VariableName& Operand::asVariable() const { + U_ASSERT(isVariable()); + return var; +} + +Operand::~Operand() {} + +//---------------- Key + +/* static */ Key* Key::create(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + Key* k = new Key(); + if (k == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return k; +} + +/* static */ Key* Key::create(const Literal& lit, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + Key* k = new Key(lit); + if (k == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return k; +} + + +void Key::toString(UnicodeString& result) const { + if (isWildcard()) { + result += ASTERISK; + return; + } + result += contents.stringContents(); +} + +const Literal& Key::asLiteral() const { + U_ASSERT(!isWildcard()); + return contents; +} + +//---------------- VariantMap + +int32_t VariantMap::size() const { + return contents->size(); +} + +// Because ImmutableVector::get() returns a T*, +// the out-parameters for `next()` are references to pointers +// rather than references to a `SelectorKeys` or a `Pattern`, +// in order to avoid either copying or creating a reference to +// a temporary value. +UBool VariantMap::next(int32_t &pos, const SelectorKeys*& k, const Pattern*& v) const { + UnicodeString unused; + if (!contents->next(pos, unused, v)) { + return false; + } + k = keyLists->get(pos - 1); + return true; +} + +VariantMap::Builder& VariantMap::Builder::add(SelectorKeys* key, Pattern* value, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + // Stringify `key` + UnicodeString keyResult; + concatenateKeys(*key, keyResult); + contents->add(keyResult, value, errorCode); + keyLists->add(key, errorCode); + return *this; +} + +VariantMap* VariantMap::Builder::build(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + + LocalPointer> adoptedContents(contents->build(errorCode)); + LocalPointer> adoptedKeyLists(keyLists->build(errorCode)); + NULL_ON_ERROR(errorCode); + VariantMap* result = new VariantMap(adoptedContents.orphan(), adoptedKeyLists.orphan()); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ void VariantMap::Builder::concatenateKeys(const SelectorKeys& keys, UnicodeString& result) { + const KeyList& ks = keys.getKeys(); + int32_t len = ks.length(); + for (int32_t i = 0; i < len; i++) { + ks.get(i)->toString(result); + if (i != len - 1) { + result += SPACE; + } + } +} + +VariantMap::Builder::Builder(UErrorCode& errorCode) { + // initialize `contents` + // No value comparator needed + contents.adoptInstead(OrderedMap::builder(errorCode)); + // initialize `keyLists` + keyLists.adoptInstead(ImmutableVector::builder(errorCode)); + // `keyLists` does not adopt its elements +} + +/* static */ VariantMap::Builder* VariantMap::builder(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer result(new VariantMap::Builder(errorCode)); + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +VariantMap::VariantMap(OrderedMap* vs, ImmutableVector* ks) : contents(vs), keyLists(ks) { + // Check invariant: `vs` and `ks` have the same size + U_ASSERT(vs->size() == ks->length()); +} + +VariantMap::Builder::~Builder() {} + +// ------------ Reserved + +int32_t Reserved::numParts() const { + U_ASSERT(!isBogus()); + return parts->length(); +} + +// Returns a const Literal* because ImmutableVector::get() returns a pointer +const Literal* Reserved::getPart(int32_t i) const { + U_ASSERT(!isBogus()); + U_ASSERT(i < numParts()); + return parts->get(i); +} + +Reserved::Builder::Builder(UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + parts.adoptInstead(ImmutableVector::builder(errorCode)); +} + +Reserved::Builder* Reserved::builder(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer tree(new Builder(errorCode)); + NULL_ON_ERROR(errorCode); + return tree.orphan(); +} + +Reserved* Reserved::Builder::build(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + LocalPointer> reservedParts(parts->build(errorCode)); + NULL_ON_ERROR(errorCode); + Reserved* result = new Reserved(reservedParts.orphan()); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +Reserved::Builder& Reserved::Builder::add(const Literal& part, UErrorCode &errorCode) { + THIS_ON_ERROR(errorCode); + + LocalPointer lit(new Literal(part)); + if (!lit.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + parts->add(lit.orphan(), errorCode); + + return *this; +} + +Reserved::Builder::~Builder() {} + +//------------------------ Operator + +const FunctionName& Operator::getFunctionName() const { + U_ASSERT(!isBogus() && !isReserved()); + return functionName; +} + +UnicodeString FunctionName::toString() const { + UnicodeString result; + result += sigilChar(); + result += functionName; + return result; +} + +FunctionName::~FunctionName() {} + +const Reserved& Operator::asReserved() const { + U_ASSERT(!isBogus() && isReserved()); + return *reserved; +} + +const OptionMap& Operator::getOptions() const { + U_ASSERT(!isBogus() && !isReserved()); + return *options; +} + +// See comments under `SelectorKeys` for why this is here. +// In this case, the invariant is (isReservedSequence && reserved.isValid() && !options.isValid()) +// || (!isReservedSequence && !reserved.isValid() && options.isValid()) +bool Operator::isBogus() const { + if (isReservedSequence) { + return !((reserved.isValid() && !options.isValid())); + } + return (!(!reserved.isValid() && options.isValid())); +} + +Operator::Builder& Operator::Builder::setReserved(Reserved* reserved) { + U_ASSERT(reserved != nullptr); + asReserved.adoptInstead(reserved); + functionName.adoptInstead(nullptr); + options.adoptInstead(nullptr); + return *this; +} + +Operator::Builder& Operator::Builder::setFunctionName(const FunctionName& func, UErrorCode& errorCode) { + asReserved.adoptInstead(nullptr); + functionName.adoptInstead(new FunctionName(func)); + if (!functionName.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return *this; +} + +Operator::Builder& Operator::Builder::addOption(const UnicodeString &key, Operand* value, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + U_ASSERT(value != nullptr); + asReserved.adoptInstead(nullptr); + // Adopt the value so it can be deleted in the error case + LocalPointer adoptedValue(value); + if (!adoptedValue.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + if (!options.isValid()) { + options.adoptInstead(OptionMap::builder(errorCode)); + THIS_ON_ERROR(errorCode); + } + // If the option name is already in the map, emit a data model error + if (options->has(key)) { + errorCode = U_DUPLICATE_OPTION_NAME_ERROR; + } else { + options->add(key, adoptedValue.orphan(), errorCode); + } + return *this; +} + +Operator* Operator::Builder::build(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + + LocalPointer result; + // Must be either reserved or function, not both; enforced by methods + if (asReserved.isValid()) { + // Methods enforce that the function name and options are unset + // if `setReserved()` is called, so if they were valid, that + // would indicate a bug. + U_ASSERT(!(functionName.isValid() || options.isValid())); + result.adoptInstead(Operator::create(*asReserved, errorCode)); + } else { + if (!functionName.isValid()) { + // Neither function name nor reserved was set + // There is no default, so this case could occur if the + // caller creates a builder and doesn't make any calls + // before calling build(). + errorCode = U_INVALID_STATE_ERROR; + return nullptr; + } + if (options.isValid()) { + LocalPointer opts(options->build(errorCode)); + NULL_ON_ERROR(errorCode); + result.adoptInstead(Operator::create(*functionName, opts.orphan(), errorCode)); + } else { + result.adoptInstead(Operator::create(*functionName, nullptr, errorCode)); + } + } + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +/* static */ Operator* Operator::create(const Reserved& r, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Operator* result = new Operator(r); + if (result == nullptr || result->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ Operator* Operator::create(const FunctionName& f, OptionMap* opts, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + // opts may be null -- in that case, we create an empty OptionMap + // for simplicity + LocalPointer adoptedOpts; + if (opts == nullptr) { + LocalPointer builder(OptionMap::builder(errorCode)); + adoptedOpts.adoptInstead(builder->build(errorCode)); + } else { + adoptedOpts.adoptInstead(opts); + } + NULL_ON_ERROR(errorCode); + + Operator* result = new Operator(f, adoptedOpts.orphan()); + if (result == nullptr || result->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ Operator::Builder* Operator::builder(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer result(new Operator::Builder()); + if (!result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +Operator::Operator(const Operator& other) : isReservedSequence(other.isReservedSequence), + functionName(other.functionName), + options(isReservedSequence ? nullptr + : new OptionMap(*other.options)), + reserved(isReservedSequence? new Reserved(*(other.reserved)) + : nullptr) { + U_ASSERT(!other.isBogus()); +} + +// Function call constructor; adopts `f` and `l`, which must be non-null +Operator::Operator(const FunctionName& f, OptionMap *l) : isReservedSequence(false), functionName(f), options(l), reserved(nullptr) { + U_ASSERT(l != nullptr); + } + +Operator::Builder::~Builder() {} + +// ------------ Expression + + +UBool Expression::isStandaloneAnnotation() const { + U_ASSERT(!isBogus()); + return rand->isNull(); +} + +// Returns true for function calls with operands as well as +// standalone annotations. +// Reserved sequences are not function calls +UBool Expression::isFunctionCall() const { + U_ASSERT(!isBogus()); + return (rator.isValid() && !rator->isReserved()); +} + +UBool Expression::isReserved() const { + U_ASSERT(!isBogus()); + return (rator.isValid() && rator->isReserved()); +} + +const Operator& Expression::getOperator() const { + U_ASSERT(isFunctionCall() || isReserved()); + return *rator; +} + +// May return null operand +const Operand& Expression::getOperand() const { + return *rand; +} + +Expression::Builder& Expression::Builder::setOperand(Operand* rAnd) { + U_ASSERT(rAnd != nullptr); + rand.adoptInstead(rAnd); + return *this; +} + +Expression::Builder& Expression::Builder::setOperator(Operator* rAtor) { + U_ASSERT(rAtor != nullptr); + rator.adoptInstead(rAtor); + return *this; +} + +// Postcondition: U_FAILURE(errorCode) || (result != nullptr && !isBogus(result)) +Expression* Expression::Builder::build(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + + if ((!rand.isValid() || rand->isNull()) && !rator.isValid()) { + errorCode = U_INVALID_STATE_ERROR; + return nullptr; + } + LocalPointer result; + if (rand.isValid() && rator.isValid()) { + result.adoptInstead(new Expression(*rator, *rand)); + } else if (rand.isValid() && !rator.isValid()) { + result.adoptInstead(new Expression(*rand)); + } else { + // rator is valid, rand is not valid + result.adoptInstead(new Expression(*rator)); + } + + if (!result.isValid() || result->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +/* static */ Expression::Builder* Expression::builder(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer result(new Builder()); + if (!result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +Expression::Expression(const Expression& other) : rator(other.rator.isValid() ? new Operator(*(other.rator)) : nullptr), + rand(other.rand.isValid() ? new Operand(*(other.rand)) : nullptr) { + U_ASSERT(!other.isBogus()); + if (other.rator.isValid() && other.rand.isValid()) { + bogus = !(rator.isValid() && rand.isValid()); + return; + } + if (other.rator.isValid()) { + bogus = !rator.isValid(); + return; + } + U_ASSERT(other.rand.isValid()); + bogus = !rand.isValid(); +} + +bool Expression::isBogus() const { + if (bogus) { + return true; + } + // Invariant: if the expression is not bogus and it + // has a non-null operator, that operator is not bogus. + // (Operands are never bogus.) + U_ASSERT(!rator.isValid() || !rator->isBogus()); + return false; +} + +Expression::Builder::~Builder() {} + +// ----------- PatternPart + +/* static */ PatternPart* PatternPart::create(const UnicodeString& t, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + PatternPart* result = new PatternPart(t); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ PatternPart* PatternPart::create(Expression* e, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + U_ASSERT(e != nullptr); + LocalPointer adoptedExpr(e); + PatternPart* result = new PatternPart(adoptedExpr.orphan()); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +const Expression& PatternPart::contents() const { + U_ASSERT(!isText() && !isBogus()); + return *expression; +} + +// Precondition: isText(); +const UnicodeString& PatternPart::asText() const { + U_ASSERT(isText()); + return text; +} + +// ---------------- Pattern + +const PatternPart* Pattern::getPart(int32_t i) const { + U_ASSERT(!isBogus() && i < numParts()); + return parts->get(i); +} + +Pattern::Builder::Builder(UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + parts.adoptInstead(ImmutableVector::builder(errorCode)); +} + +Pattern::Builder* Pattern::builder(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer tree(new Builder(errorCode)); + NULL_ON_ERROR(errorCode); + return tree.orphan(); +} + +Pattern* Pattern::Builder::build(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + LocalPointer> patternParts(parts->build(errorCode)); + NULL_ON_ERROR(errorCode); + Pattern* result = new Pattern(patternParts.orphan()); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +Pattern::Builder& Pattern::Builder::add(PatternPart* part, UErrorCode &errorCode) { + THIS_ON_ERROR(errorCode); + + parts->add(part, errorCode); + return *this; +} + +Pattern::Builder::~Builder() {} + +// ---------------- Binding + +/* static */ Binding* Binding::create(const VariableName& var, Expression* e, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Binding *b = new Binding(var, e); + if (b == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return b; +} + +const Expression& Binding::getValue() const { + U_ASSERT(!isBogus()); + return *value; +} + +Binding::Binding(const Binding& other) : var(other.var), value(new Expression(*other.value)) { + U_ASSERT(!other.isBogus()); +} + +Binding::~Binding() {} + +// --------------- MessageFormatDataModel + + +// The `hasSelectors()` method is provided so that `getSelectors()`, +// `getVariants()` and `getPattern()` can rely on preconditions +// rather than taking error codes as arguments. +UBool MessageFormatDataModel::hasSelectors() const { + if (pattern.isValid()) { + U_ASSERT(!selectors.isValid()); + U_ASSERT(!variants.isValid()); + return false; + } + U_ASSERT(selectors.isValid()); + U_ASSERT(variants.isValid()); + return true; +} + +const ExpressionList& MessageFormatDataModel::getSelectors() const { + U_ASSERT(hasSelectors()); + return *selectors; +} + +const VariantMap& MessageFormatDataModel::getVariants() const { + U_ASSERT(hasSelectors()); + return *variants; +} + +const Pattern& MessageFormatDataModel::getPattern() const { + U_ASSERT(!hasSelectors()); + return *pattern; +} + +MessageFormatDataModel::Builder::Builder(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + selectors.adoptInstead(ExpressionList::builder(errorCode)); + variants.adoptInstead(VariantMap::builder(errorCode)); + locals.adoptInstead(Bindings::builder(errorCode)); +} + +// Invalidate pattern and create selectors/variants if necessary +void MessageFormatDataModel::Builder::buildSelectorsMessage(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + if (pattern.isValid()) { + pattern.adoptInstead(nullptr); + } + if (!selectors.isValid()) { + U_ASSERT(!variants.isValid()); + selectors.adoptInstead(ExpressionList::builder(errorCode)); + variants.adoptInstead(VariantMap::builder(errorCode)); + } else { + U_ASSERT(variants.isValid()); + } +} + +MessageFormatDataModel::Builder& MessageFormatDataModel::Builder::addLocalVariable(const VariableName&variableName, Expression *expression, UErrorCode &errorCode) { + THIS_ON_ERROR(errorCode); + + LocalPointer b(Binding::create(variableName, expression, errorCode)); + THIS_ON_ERROR(errorCode); + locals->add(b.orphan(), errorCode); + + return *this; +} + +/* + selector must be non-null +*/ +MessageFormatDataModel::Builder& MessageFormatDataModel::Builder::addSelector(Expression* selector, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + U_ASSERT(selector != nullptr); + buildSelectorsMessage(errorCode); + + selectors->add(selector, errorCode); + + return *this; +} + +/* + `keys` and `pattern` must be non-null + Adopts `keys` and `pattern` +*/ +MessageFormatDataModel::Builder& MessageFormatDataModel::Builder::addVariant(SelectorKeys* keys, Pattern* pattern, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + U_ASSERT(keys != nullptr); + U_ASSERT(pattern != nullptr); + + buildSelectorsMessage(errorCode); + + variants->add(keys, pattern, errorCode); + + return *this; +} + +MessageFormatDataModel::Builder* MessageFormatDataModel::builder(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer result(new Builder(errorCode)); + if (U_FAILURE(errorCode)) { + return nullptr; + } + return result.orphan(); +} + +MessageFormatDataModel::Builder& MessageFormatDataModel::Builder::setPattern(Pattern* pat) { + // Can't set pattern to null + U_ASSERT(pat != nullptr); + pattern.adoptInstead(pat); + // Invalidate selectors and variants + selectors.adoptInstead(nullptr); + variants.adoptInstead(nullptr); + return *this; +} + +MessageFormatDataModel::MessageFormatDataModel(const MessageFormatDataModel::Builder& builder, UErrorCode &errorCode) + : selectors(builder.pattern.isValid() ? nullptr : builder.selectors->build(errorCode)), + variants(builder.pattern.isValid() ? nullptr : builder.variants->build(errorCode)), + pattern(builder.pattern.isValid() ? new Pattern(*(builder.pattern)) : nullptr), + bindings(builder.locals->build(errorCode)) +{ + CHECK_ERROR(errorCode); + + if (builder.pattern.isValid()) { + // If `pattern` has been set, then assume this is a Pattern message + U_ASSERT(!builder.selectors.isValid()); + U_ASSERT(!builder.variants.isValid()); + U_ASSERT(!hasSelectors()); + } else { + // Otherwise, this is a Selectors message + U_ASSERT(builder.selectors.isValid()); + U_ASSERT(builder.variants.isValid()); + U_ASSERT(hasSelectors()); + } +} + +MessageFormatDataModel* MessageFormatDataModel::Builder::build(UErrorCode &errorCode) const { + NULL_ON_ERROR(errorCode); + + bool patternValid = pattern.isValid(); + bool selectorsValid = selectors.isValid() && variants.isValid(); + + // Either pattern is valid, or both selectors and variants are valid; but not both + if ((patternValid && selectorsValid) + || (!patternValid && !selectorsValid)) { + errorCode = U_INVALID_STATE_ERROR; + return nullptr; + } + + // Initialize the data model + LocalPointer dataModel(new MessageFormatDataModel(*this, errorCode)); + NULL_ON_ERROR(errorCode); + return dataModel.orphan(); +} + +MessageFormatDataModel::~MessageFormatDataModel() {} +template<> +ImmutableVector::Builder::~Builder() {} +template<> +ImmutableVector::~ImmutableVector() {} +template<> +ImmutableVector::Builder::~Builder() {} +template<> +ImmutableVector::~ImmutableVector() {} +template<> +ImmutableVector::Builder::~Builder() {} +template<> +ImmutableVector::~ImmutableVector() {} +template<> +ImmutableVector::Builder::~Builder() {} +template<> +ImmutableVector::~ImmutableVector() {} +template<> +ImmutableVector::Builder::~Builder() {} +template<> +ImmutableVector::~ImmutableVector() {} +template<> +ImmutableVector::Builder::~Builder() {} +template<> +ImmutableVector::~ImmutableVector() {} +template<> +OrderedMap::Builder::~Builder() {} +template<> +OrderedMap::~OrderedMap() {} +template<> +OrderedMap::Builder::~Builder() {} +template<> +OrderedMap::~OrderedMap() {} +} // namespace message2 + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/icu4c/source/i18n/messageformat2_formatting_context.cpp b/icu4c/source/i18n/messageformat2_formatting_context.cpp new file mode 100644 index 000000000000..6d393a81a217 --- /dev/null +++ b/icu4c/source/i18n/messageformat2_formatting_context.cpp @@ -0,0 +1,770 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2_formatting_context.h" +#include "unicode/messageformat2_function_registry.h" +#include "unicode/messageformat2_data_model.h" +#include "unicode/messageformat2.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +// Context that's specific to formatting a single expression + +// Constructors +// ------------ + +/* static */ ExpressionContext* ExpressionContext::create(MessageContext& globalContext, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer result(new ExpressionContext(globalContext, errorCode)); + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +ExpressionContext* ExpressionContext::create(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer result(new ExpressionContext(context, errorCode)); + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +ExpressionContext::ExpressionContext(MessageContext& c, UErrorCode& errorCode) : context(c), inState(FALLBACK), outState(NONE) { + CHECK_ERROR(errorCode); + + initFunctionOptions(errorCode); +} + +void ExpressionContext::initFunctionOptions(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + functionOptions.adoptInstead(new Hashtable(compareVariableName, nullptr, errorCode)); + CHECK_ERROR(errorCode); + // `functionOptions` owns its values + functionOptions->setValueDeleter(uprv_deleteUObject); +} + +// State +// --------- + +void ExpressionContext::enterState(InputState s) { + // If we're entering an error state, clear the output + if (s == InputState::FALLBACK) { + enterState(OutputState::NONE); + } + inState = s; + +} + +void ExpressionContext::enterState(OutputState s) { + // Input must exist if output exists + if (s > OutputState::NONE) { + U_ASSERT(hasInput()); + } + outState = s; +} + +bool ExpressionContext::isFallback() const { + return (inState == InputState::FALLBACK); +} + +void ExpressionContext::setFallback() { + enterState(FALLBACK); +} + +// Fallback values are enclosed in curly braces; +// see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#formatting-fallback-values +static void fallbackToString(const UnicodeString& s, UnicodeString& result) { + result += LEFT_CURLY_BRACE; + result += s; + result += RIGHT_CURLY_BRACE; +} + +void ExpressionContext::setFallbackTo(const FunctionName& f) { + fallback.remove(); + fallbackToString(f.toString(), fallback); +} + +void ExpressionContext::setFallbackTo(const VariableName& v) { + fallback.remove(); + fallbackToString(v.declaration(), fallback); +} + +void ExpressionContext::setFallbackTo(const MessageFormatDataModel::Literal& l) { + fallback.remove(); + fallbackToString(l.quotedString(), fallback); +} + +// Add the fallback string as the input string, and +// unset this as a fallback +void ExpressionContext::promoteFallbackToInput() { + U_ASSERT(isFallback()); + return setInput(fallback); +} + +// Add the fallback string as the output string +void ExpressionContext::promoteFallbackToOutput() { + U_ASSERT(isFallback()); + return setOutput(fallback); +} + +// Used when handling function calls with no argument +void ExpressionContext::setNoOperand() { + U_ASSERT(isFallback()); + enterState(NO_OPERAND); +} + +void ExpressionContext::setInput(const UnicodeString& s) { + U_ASSERT(inState <= NO_OPERAND); + enterState(FORMATTABLE_INPUT); + input = Formattable(s); +} + +void ExpressionContext::setInput(const Formattable& s) { + U_ASSERT(isFallback()); + enterState(FORMATTABLE_INPUT); + U_ASSERT(s.getType() != Formattable::Type::kObject); + input = s; +} + +UBool ExpressionContext::hasFormattableInput() const { + return (inState == InputState::FORMATTABLE_INPUT); +} + +UBool ExpressionContext::hasObjectInput() const { + return (inState == InputState::OBJECT_INPUT); +} + +const UObject& ExpressionContext::getObjectInput() const { + U_ASSERT(hasObjectInput()); + return *objectInput; +} + +const UObject* ExpressionContext::getObjectInputPointer() const { + U_ASSERT(hasObjectInput()); + return objectInput; +} + +const Formattable& ExpressionContext::getFormattableInput() const { + U_ASSERT(hasFormattableInput()); + return input; +} + +const number::FormattedNumber& ExpressionContext::getNumberOutput() const { + U_ASSERT(hasNumberOutput()); + return numberOutput; +} + +UBool ExpressionContext::hasStringOutput() const { + return (inState > FALLBACK && outState == OutputState::STRING); +} + +UBool ExpressionContext::hasNumberOutput() const { + return (inState > FALLBACK && outState == OutputState::NUMBER); +} + +const UnicodeString& ExpressionContext::getStringOutput() const { + U_ASSERT(hasStringOutput()); + return stringOutput; +} + +void ExpressionContext::setInput(const UObject* obj) { + U_ASSERT(isFallback()); + U_ASSERT(obj != nullptr); + enterState(OBJECT_INPUT); + objectInput = obj; +} + +void ExpressionContext::setOutput(const UnicodeString& s) { + if (inState == InputState::NO_OPERAND) { + // If setOutput() is called while the + // operand is null, set the input to the + // output string + setInput(s); + } + U_ASSERT(hasInput()); + enterState(OutputState::STRING); + stringOutput = s; +} + +void ExpressionContext::setOutput(number::FormattedNumber&& num) { + U_ASSERT(hasInput()); + enterState(OutputState::NUMBER); + numberOutput = std::move(num); +} + +void ExpressionContext::clearOutput() { + stringOutput.remove(); + enterState(OutputState::NONE); +} + +// Called when output is required and no output is present; +// formats the input to a string with defaults, for inputs that can be +// formatted with a default formatter +void ExpressionContext::formatInputWithDefaults(const Locale& locale, UErrorCode& status) { + CHECK_ERROR(status); + + U_ASSERT(hasFormattableInput()); + U_ASSERT(!hasOutput()); + + // Try as decimal number first + if (input.isNumeric()) { + StringPiece asDecimal = input.getDecimalNumber(status); + CHECK_ERROR(status); + if (asDecimal != nullptr) { + setOutput(formatNumberWithDefaults(locale, asDecimal, status)); + return; + } + } + + switch (input.getType()) { + case Formattable::Type::kDate: { + formatDateWithDefaults(locale, input.getDate(), stringOutput, status); + enterState(OutputState::STRING); + break; + } + case Formattable::Type::kDouble: { + setOutput(formatNumberWithDefaults(locale, input.getDouble(), status)); + break; + } + case Formattable::Type::kLong: { + setOutput(formatNumberWithDefaults(locale, input.getLong(), status)); + break; + } + case Formattable::Type::kInt64: { + setOutput(formatNumberWithDefaults(locale, input.getInt64(), status)); + break; + } + case Formattable::Type::kString: { + setOutput(input.getString()); + break; + } + default: { + // No default formatters for other types; use fallback + promoteFallbackToOutput(); + } + } +} + +// Called when string output is required; forces output to be produced +// if none is present (including formatting number output as a string) +void ExpressionContext::formatToString(const Locale& locale, UErrorCode& status) { + CHECK_ERROR(status); + + switch (outState) { + case OutputState::STRING: { + return; // Nothing to do + } + case OutputState::NUMBER: { + setOutput(numberOutput.toString(status)); + return; + } + default: { + break; + } + } + switch (inState) { + case InputState::FALLBACK: { + setInput(fallback); + setOutput(fallback); + break; + } + case InputState::NO_OPERAND: + // No operand and a function call hasn't cleared the state -- + // use fallback + case InputState::OBJECT_INPUT: { + setFallback(); + promoteFallbackToOutput(); + break; + } + case InputState::FORMATTABLE_INPUT: { + formatInputWithDefaults(locale, status); + // Force number to string, in case the result was a number + formatToString(locale, status); + break; + } + } + CHECK_ERROR(status); + U_ASSERT(hasStringOutput()); +} + +void ExpressionContext::clearFunctionName() { + U_ASSERT(pendingFunctionName.isValid()); + pendingFunctionName.adoptInstead(nullptr); +} + +const FunctionName& ExpressionContext::getFunctionName() { + U_ASSERT(pendingFunctionName.isValid()); + return *pendingFunctionName; +} + +// Helper functions for function options +// ------------------------------------- + +/* static */ Formattable* ExpressionContext::createFormattable(const UnicodeString& v, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Formattable* result = new Formattable(v); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ Formattable* ExpressionContext::createFormattable(double v, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Formattable* result = new Formattable(v); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ Formattable* ExpressionContext::createFormattable(int64_t v, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Formattable* result = new Formattable(v); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ Formattable* ExpressionContext::createFormattable(const UObject* v, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + // This object will only be accessed through getObjectOption(), which returns + // a const reference + Formattable* result = new Formattable(const_cast(v)); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ Formattable* ExpressionContext::createFormattable(const UnicodeString* in, int32_t count, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalArray arr(new Formattable[count]); + if (!arr.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + + for (int32_t i = 0; i < count; i++) { + Formattable val((const UnicodeString&) in[i]); + arr[i] = val; + } + + Formattable* result(new Formattable(arr.getAlias(), count)); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ Formattable* ExpressionContext::createFormattableDate(UDate v, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Formattable* result = new Formattable(v, Formattable::kIsDate); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* static */ Formattable* ExpressionContext::createFormattableDecimal(StringPiece val, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Formattable* result = new Formattable(val, errorCode); + if (U_FAILURE(errorCode)) { + return nullptr; + } + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +// Function options iterator +int32_t ExpressionContext::firstOption() const { return UHASH_FIRST; } + +const Formattable* ExpressionContext::nextOption(int32_t& pos, UnicodeString& key) const { + U_ASSERT(functionOptions.isValid()); + const UHashElement* next = functionOptions->nextElement(pos); + if (next == nullptr) { + return nullptr; + } + key = *((UnicodeString*) next->key.pointer); + return (const Formattable*) next->value.pointer; +} + +int32_t ExpressionContext::optionsCount() const { + U_ASSERT(functionOptions.isValid()); + return functionOptions->count(); +} + + +// Function options +// ---------------- + +// Adopts `val` +void ExpressionContext::addFunctionOption(const UnicodeString& k, Formattable* val, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + U_ASSERT(functionOptions.isValid()); + functionOptions->put(k, val, errorCode); +} + +void ExpressionContext::setStringOption(const UnicodeString& key, const UnicodeString& value, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer valuePtr(createFormattable(value, errorCode)); + CHECK_ERROR(errorCode); + addFunctionOption(key, valuePtr.orphan(), errorCode); +} + +void ExpressionContext::setDateOption(const UnicodeString& key, UDate date, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer valuePtr(createFormattableDate(date, errorCode)); + CHECK_ERROR(errorCode); + addFunctionOption(key, valuePtr.orphan(), errorCode); +} + +void ExpressionContext::setNumericOption(const UnicodeString& key, double value, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer valuePtr(createFormattable(value, errorCode)); + CHECK_ERROR(errorCode); + addFunctionOption(key, valuePtr.orphan(), errorCode); +} + +void ExpressionContext::setObjectOption(const UnicodeString& key, const UObject* value, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer valuePtr(createFormattable(value, errorCode)); + CHECK_ERROR(errorCode); + addFunctionOption(key, valuePtr.orphan(), errorCode); +} + +Formattable* ExpressionContext::getOption(const UnicodeString& key, Formattable::Type type) const { + U_ASSERT(functionOptions.isValid()); + Formattable* result = (Formattable*) functionOptions->get(key); + if (result == nullptr || result->getType() != type) { + return nullptr; + } + return result; +} + +Formattable* ExpressionContext::getNumericOption(const UnicodeString& key) const { + U_ASSERT(functionOptions.isValid()); + Formattable* result = (Formattable*) functionOptions->get(key); + if (result == nullptr || !result->isNumeric()) { + return nullptr; + } + return result; +} + +UBool ExpressionContext::getStringOption(const UnicodeString& key, UnicodeString& value) const { + Formattable* result = getOption(key, Formattable::Type::kString); + if (result == nullptr) { + return false; + } + value = result->getString(); + return true; +} + +const UObject& ExpressionContext::getObjectOption(const UnicodeString& key) const { + Formattable* result = getOption(key, Formattable::Type::kObject); + U_ASSERT(result != nullptr); + const UObject* value = result->getObject(); + U_ASSERT(value != nullptr); + return *value; +} + +UBool ExpressionContext::hasObjectOption(const UnicodeString& key) const { + Formattable* result = getOption(key, Formattable::Type::kObject); + return (result != nullptr); +} + +bool ExpressionContext::tryStringAsNumberOption(const UnicodeString& key, double& value) const { + // Check for a string option, try to parse it as a number if present + UnicodeString tempValue; + if (!getStringOption(key, tempValue)) { + return false; + } + UErrorCode localErrorCode = U_ZERO_ERROR; + LocalPointer numberFormat(NumberFormat::createInstance(context.messageFormatter().getLocale(), localErrorCode)); + if (U_FAILURE(localErrorCode)) { + return false; + } + Formattable asNumber; + numberFormat->parse(tempValue, asNumber, localErrorCode); + if (U_FAILURE(localErrorCode)) { + return false; + } + value = asNumber.getDouble(localErrorCode); + if (U_FAILURE(localErrorCode)) { + return false; + } + return true; +} + +UBool ExpressionContext::getInt64Option(const UnicodeString& key, int64_t& value) const { + Formattable* result = getNumericOption(key); + if (result == nullptr) { + double doubleResult; + if (tryStringAsNumberOption(key, doubleResult)) { + value = (int64_t) doubleResult; + return true; + } + return false; + } + UErrorCode localErrorCode = U_ZERO_ERROR; + value = result->getInt64(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return true; + } + // Option was numeric but couldn't be converted to int64_t -- could be overflow + return false; +} + +UBool ExpressionContext::getDoubleOption(const UnicodeString& key, double& value) const { + Formattable* result = getNumericOption(key); + if (result == nullptr) { + return tryStringAsNumberOption(key, value); + } + UErrorCode localErrorCode = U_ZERO_ERROR; + value = result->getDouble(localErrorCode); + // The conversion must succeed, since the result is numeric + U_ASSERT(U_SUCCESS(localErrorCode)); + return true; +} + + +// Functions +// ------------- + +void ExpressionContext::setFunctionName(const FunctionName& fn, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + U_ASSERT(!hasFunctionName()); + pendingFunctionName.adoptInstead(new FunctionName(fn)); + if (!pendingFunctionName.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } +} + +bool ExpressionContext::hasFunctionName() const { + return pendingFunctionName.isValid(); +} + +void ExpressionContext::returnFromFunction() { + U_ASSERT(hasFunctionName()); + clearFunctionName(); + clearFunctionOptions(); +} + +void ExpressionContext::clearFunctionOptions() { + U_ASSERT(functionOptions.isValid()); + functionOptions->removeAll(); +} + +// Precondition: pending function name is set and selector is defined +// Postcondition: selector != nullptr +Selector* ExpressionContext::getSelector(UErrorCode& status) const { + NULL_ON_ERROR(status); + + U_ASSERT(pendingFunctionName.isValid()); + const FunctionName& functionName = *pendingFunctionName; + const SelectorFactory* selectorFactory = context.lookupSelectorFactory(functionName, status); + NULL_ON_ERROR(status); + // Create a specific instance of the selector + LocalPointer result(selectorFactory->createSelector(context.messageFormatter().getLocale(), status)); + NULL_ON_ERROR(status); + return result.orphan(); +} + +// Precondition: pending function name is set and formatter is defined +// Postcondition: formatter != nullptr +const Formatter* ExpressionContext::getFormatter(UErrorCode& status) { + NULL_ON_ERROR(status); + + U_ASSERT(pendingFunctionName.isValid()); + U_ASSERT(hasFormatter()); + return context.maybeCachedFormatter(*pendingFunctionName, status); +} + +bool ExpressionContext::hasFormatter() const { + U_ASSERT(pendingFunctionName.isValid()); + return context.isFormatter(*pendingFunctionName); +} + +bool ExpressionContext::hasSelector() const { + if (!pendingFunctionName.isValid()) { + return false; + } + return context.isSelector(*pendingFunctionName); +} + +// Calls the pending selector +// keys and keysOut are vectors of strings +void ExpressionContext::evalPendingSelectorCall(const UVector& keys, UVector& keysOut, UErrorCode& status) { + CHECK_ERROR(status); + + U_ASSERT(pendingFunctionName.isValid()); + U_ASSERT(hasSelector()); + LocalPointer selectorImpl(getSelector(status)); + CHECK_ERROR(status); + UErrorCode savedStatus = status; + + // Convert the vectors to arrays for the call + int32_t numKeys = keys.size(); + LocalArray keysArray(new UnicodeString*[numKeys]); + LocalArray keysOutArray(new UnicodeString*[numKeys]); + if (!keysArray.isValid() || !keysOutArray.isValid()) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t i = 0; i < keys.size(); i++) { + keysArray[i] = static_cast(keys[i]); + } + + int32_t numberMatching = 0; + selectorImpl->selectKey(*this, keysArray.getAlias(), numKeys, keysOutArray.getAlias(), numberMatching, status); + // Update errors + if (savedStatus != status) { + if (U_FAILURE(status)) { + setFallback(); + status = U_ZERO_ERROR; + setSelectorError(pendingFunctionName->toString(), status); + } else { + // Ignore warnings + status = savedStatus; + } + } + returnFromFunction(); + + // Copy the keys back into the vector + LocalPointer tempKey; + for (int32_t i = 0; i < numberMatching; i++) { + // Because both `keys` and `keysOut` own their elements, + // the string has to be copied here + tempKey.adoptInstead(new UnicodeString(*keysOutArray[i])); + if (!tempKey.isValid()) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + keysOut.adoptElement(tempKey.orphan(), status); + } + keysOut.setSize(numberMatching, status); +} + +// Calls the pending formatter +void ExpressionContext::evalFormatterCall(const FunctionName& functionName, UErrorCode& status) { + CHECK_ERROR(status); + + LocalPointer savedFunctionName(pendingFunctionName.isValid() ? pendingFunctionName.orphan() : nullptr); + setFunctionName(functionName, status); + CHECK_ERROR(status); + if (hasFormatter()) { + const Formatter* formatterImpl = getFormatter(status); + CHECK_ERROR(status); + UErrorCode savedStatus = status; + formatterImpl->format(*this, status); + // Update errors + if (savedStatus != status) { + if (U_FAILURE(status)) { + // Convey any error generated by the formatter + // as a formatting error + setFallback(); + status = U_ZERO_ERROR; + setFormattingError(functionName.toString(), status); + } else { + // Ignore warnings + status = savedStatus; + } + } + // Ignore the output if any errors occurred + if (context.getErrors().hasFormattingError()) { + clearOutput(); + } + returnFromFunction(); + if (savedFunctionName.isValid()) { + setFunctionName(*savedFunctionName, status); + } + return; + } + // No formatter with this name -- set error + if (context.isSelector(functionName)) { + setFormattingError(functionName.toString(), status); + } else { + context.getErrors().setUnknownFunction(functionName, status); + } + setFallback(); +} + +// Default formatters +// ------------------ + +number::FormattedNumber formatNumberWithDefaults(const Locale& locale, double toFormat, UErrorCode& errorCode) { + return number::NumberFormatter::withLocale(locale).formatDouble(toFormat, errorCode); +} + +number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int32_t toFormat, UErrorCode& errorCode) { + return number::NumberFormatter::withLocale(locale).formatInt(toFormat, errorCode); +} + +number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int64_t toFormat, UErrorCode& errorCode) { + return number::NumberFormatter::withLocale(locale).formatInt(toFormat, errorCode); +} + +number::FormattedNumber formatNumberWithDefaults(const Locale& locale, StringPiece toFormat, UErrorCode& errorCode) { + return number::NumberFormatter::withLocale(locale).formatDecimal(toFormat, errorCode); +} + +DateFormat* defaultDateTimeInstance(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer df(DateFormat::createDateTimeInstance(DateFormat::SHORT, DateFormat::SHORT, locale)); + if (!df.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return df.orphan(); +} + +void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString& result, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer df(defaultDateTimeInstance(locale, errorCode)); + CHECK_ERROR(errorCode); + df->format(date, result, 0, errorCode); +} + +// Errors +// ------- + +void ExpressionContext::setFormattingError(const UnicodeString& formatterName, UErrorCode& status) { + CHECK_ERROR(status); + + context.getErrors().setFormattingError(formatterName, status); +} + +void ExpressionContext::setSelectorError(const UnicodeString& selectorName, UErrorCode& status) { + CHECK_ERROR(status); + + context.getErrors().setSelectorError(selectorName, status); +} + +ExpressionContext::~ExpressionContext() {} +FormattingContext::~FormattingContext() {} + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp new file mode 100644 index 000000000000..2b6f2b705919 --- /dev/null +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -0,0 +1,594 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/dtptngen.h" +#include "unicode/messageformat2.h" +#include "unicode/messageformat2_formatting_context.h" +#include "unicode/numberformatter.h" +#include "unicode/smpdtfmt.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +// Function registry implementation + +Formatter::~Formatter() {} +Selector::~Selector() {} +FormatterFactory::~FormatterFactory() {} +SelectorFactory::~SelectorFactory() {} + +FunctionRegistry* FunctionRegistry::Builder::build(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + U_ASSERT(formatters.isValid() && selectors.isValid()); + LocalPointer result(new FunctionRegistry(formatters.orphan(), selectors.orphan())); + if (!result.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +/* static */ FunctionRegistry::Builder* FunctionRegistry::builder(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer result(new FunctionRegistry::Builder(errorCode)); + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +FunctionRegistry::Builder::Builder(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + formatters.adoptInstead(new Hashtable(compareVariableName, nullptr, errorCode)); + selectors.adoptInstead(new Hashtable(compareVariableName, nullptr, errorCode)); + if (U_FAILURE(errorCode)) { + formatters.adoptInstead(nullptr); + selectors.adoptInstead(nullptr); + return; + } + // The hashtables own the values, but not the keys + formatters->setValueDeleter(uprv_deleteUObject); + selectors->setValueDeleter(uprv_deleteUObject); +} + +FunctionRegistry::Builder& FunctionRegistry::Builder::setSelector(const FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + if (selectorFactory == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + selectors->put(selectorName.toString(), selectorFactory, errorCode); + return *this; +} + +FunctionRegistry::Builder& FunctionRegistry::Builder::setFormatter(const FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + if (formatterFactory == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + formatters->put(formatterName.toString(), formatterFactory, errorCode); + return *this; +} + +FunctionRegistry::Builder::~Builder() {} + +FormatterFactory* FunctionRegistry::getFormatter(const FunctionName& formatterName) const { + // Caller must check for null + return ((FormatterFactory*) formatters->get(formatterName.toString())); +} + +const SelectorFactory* FunctionRegistry::getSelector(const FunctionName& selectorName) const { + // Caller must check for null + return ((SelectorFactory*) selectors->get(selectorName.toString())); +} + +void FunctionRegistry::checkFormatter(const char* s) const { +#ifdef _DEBUG + U_ASSERT(hasFormatter(FunctionName(UnicodeString(s)))); +#else + (void) s; +#endif +} + +void FunctionRegistry::checkSelector(const char* s) const { +#ifdef _DEBUG + U_ASSERT(hasSelector(FunctionName(UnicodeString(s)))); +#else + (void) s; +#endif +} + +// Debugging +void FunctionRegistry::checkStandard() const { + checkFormatter("datetime"); + checkFormatter("number"); + checkFormatter("identity"); + checkSelector("plural"); + checkSelector("selectordinal"); + checkSelector("select"); + checkSelector("gender"); +} + +// Formatter/selector helpers + +// Converts `s` to an int64 value if possible, returning false +// if it can't be parsed +static bool tryStringToNumber(const UnicodeString& s, int64_t& result) { + UErrorCode localErrorCode = U_ZERO_ERROR; + // Try to parse string as int + + LocalPointer numberFormat(NumberFormat::createInstance(localErrorCode)); + if (U_FAILURE(localErrorCode)) { + return false; + } + numberFormat->setParseIntegerOnly(true); + Formattable asNumber; + numberFormat->parse(s, asNumber, localErrorCode); + if (U_SUCCESS(localErrorCode)) { + result = asNumber.getInt64(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return true; + } + } + return false; +} + +// Converts `s` to a double, indicating failure via `errorCode` +static void strToDouble(const UnicodeString& s, const Locale& loc, double& result, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer numberFormat(NumberFormat::createInstance(loc, errorCode)); + CHECK_ERROR(errorCode); + Formattable asNumber; + numberFormat->parse(s, asNumber, errorCode); + CHECK_ERROR(errorCode); + result = asNumber.getDouble(errorCode); +} + +// Converts `optionValue` to an int64 value if possible, returning false +// if it can't be parsed +bool tryFormattableAsNumber(const Formattable& optionValue, int64_t& result) { + UErrorCode localErrorCode = U_ZERO_ERROR; + if (optionValue.isNumeric()) { + result = optionValue.getInt64(localErrorCode); + if (U_SUCCESS(localErrorCode)) { + return true; + } + } else { + if (tryStringToNumber(optionValue.getString(), result)) { + return true; + } + } + return false; +} + +FunctionRegistry::~FunctionRegistry() {} + +// Specific formatter implementations + +// --------- Number + +number::LocalizedNumberFormatter* formatterForOptions(Locale locale, const FormattingContext& context, UErrorCode& status) { + NULL_ON_ERROR(status); + + number::UnlocalizedNumberFormatter nf; + UnicodeString skeleton; + if (context.getStringOption(UnicodeString("skeleton"), skeleton)) { + nf = number::NumberFormatter::forSkeleton(skeleton, status); + } else { + int64_t minFractionDigits = 0; + context.getInt64Option(UnicodeString("minimumFractionDigits"), minFractionDigits); + nf = number::NumberFormatter::with().precision(number::Precision::minFraction(minFractionDigits)); + } + NULL_ON_ERROR(status); + LocalPointer result(new number::LocalizedNumberFormatter(nf.locale(locale))); + if (!result.isValid()) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +Formatter* StandardFunctions::NumberFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Formatter* result = new Number(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result; +} + +static void notANumber(FormattingContext& context) { + context.setOutput(UnicodeString("NaN")); +} + +static void stringAsNumber(Locale locale, const number::LocalizedNumberFormatter nf, FormattingContext& context, UnicodeString s, int64_t offset, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + double numberValue; + UErrorCode localErrorCode = U_ZERO_ERROR; + strToDouble(s, locale, numberValue, localErrorCode); + if (U_FAILURE(localErrorCode)) { + notANumber(context); + return; + } + UErrorCode savedStatus = errorCode; + number::FormattedNumber result = nf.formatDouble(numberValue - offset, errorCode); + // Ignore U_USING_DEFAULT_WARNING + if (errorCode == U_USING_DEFAULT_WARNING) { + errorCode = savedStatus; + } + context.setOutput(std::move(result)); +} + +void StandardFunctions::Number::format(FormattingContext& context, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // No argument => return "NaN" + if (!context.hasFormattableInput()) { + return notANumber(context); + } + + int64_t offset = 0; + context.getInt64Option(UnicodeString("offset"), offset); + + LocalPointer realFormatter; + if (context.optionsCount() == 0) { + realFormatter.adoptInstead(new number::LocalizedNumberFormatter(icuFormatter)); + if (!realFormatter.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + } else { + realFormatter.adoptInstead(formatterForOptions(locale, context, errorCode)); + } + CHECK_ERROR(errorCode); + + if (context.hasStringOutput()) { + stringAsNumber(locale, *realFormatter, context, context.getStringOutput(), offset, errorCode); + return; + } else if (context.hasNumberOutput()) { + // Nothing to do + return; + } + + number::FormattedNumber numberResult; + // Already checked that input is present + const Formattable& toFormat = context.getFormattableInput(); + switch (toFormat.getType()) { + case Formattable::Type::kDouble: { + numberResult = realFormatter->formatDouble(toFormat.getDouble() - offset, errorCode); + break; + } + case Formattable::Type::kLong: { + numberResult = realFormatter->formatInt(toFormat.getLong() - offset, errorCode); + break; + } + case Formattable::Type::kInt64: { + numberResult = realFormatter->formatInt(toFormat.getInt64() - offset, errorCode); + break; + } + case Formattable::Type::kString: { + // Try to parse the string as a number + stringAsNumber(locale, *realFormatter, context, toFormat.getString(), offset, errorCode); + return; + } + default: { + // Other types can't be parsed as a number + notANumber(context); + return; + } + } + + CHECK_ERROR(errorCode); + context.setOutput(std::move(numberResult)); +} + +StandardFunctions::Number::~Number() {} +StandardFunctions::NumberFactory::~NumberFactory() {} + +// --------- PluralFactory + +Selector* StandardFunctions::PluralFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + + // Look up plural rules by locale + LocalPointer rules(PluralRules::forLocale(locale, type, errorCode)); + NULL_ON_ERROR(errorCode); + Selector* result = new Plural(locale, rules.orphan()); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result; +} + +static void tryAsString(const Locale& locale, const UnicodeString& s, double& valToCheck, bool& noMatch) { + // Try parsing the inputString as a double + UErrorCode localErrorCode = U_ZERO_ERROR; + strToDouble(s, locale, valToCheck, localErrorCode); + // Invalid format error => value is not a number; no match + if (U_FAILURE(localErrorCode)) { + noMatch = true; + return; + } + noMatch = false; +} + +static void tryWithFormattable(const Locale& locale, const Formattable& value, double& valToCheck, bool& noMatch) { + switch (value.getType()) { + case Formattable::Type::kDouble: { + valToCheck = value.getDouble(); + break; + } + case Formattable::Type::kLong: { + valToCheck = (double) value.getLong(); + break; + } + case Formattable::Type::kInt64: { + valToCheck = (double) value.getInt64(); + break; + } + case Formattable::Type::kString: { + tryAsString(locale, value.getString(), valToCheck, noMatch); + return; + } + default: { + noMatch = true; + return; + } + } + noMatch = false; +} + +void StandardFunctions::Plural::selectKey(FormattingContext& context, UnicodeString** keys/*[]*/, int32_t numKeys, UnicodeString** prefs/*[]*/, int32_t& numMatching, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // No argument => return "NaN" + if (!context.hasFormattableInput()) { + context.setSelectorError(UnicodeString("plural"), errorCode); + return; + } + + int64_t offset = 0; + context.getInt64Option(UnicodeString("offset"), offset); + + // Only doubles and integers can match + double valToCheck; + bool noMatch = true; + + bool isFormattedNumber = context.hasNumberOutput(); + bool isFormattedString = context.hasStringOutput(); + + if (isFormattedString) { + // Formatted string: try parsing it as a number + tryAsString(locale, context.getStringOutput(), valToCheck, noMatch); + } else { + // Already checked that input is present + tryWithFormattable(locale, context.getFormattableInput(), valToCheck, noMatch); + } + + if (noMatch) { + // Non-number => selector error + context.setSelectorError(UnicodeString("plural"), errorCode); + numMatching = 0; + return; + } + + // Generate the matches + // ----------------------- + + U_ASSERT(keys != nullptr); + // First, check for an exact match + numMatching = 0; + double keyAsDouble = 0; + for (int32_t i = 0; i < numKeys; i++) { + // Try parsing the key as a double + UErrorCode localErrorCode = U_ZERO_ERROR; + strToDouble(*keys[i], locale, keyAsDouble, localErrorCode); + if (U_SUCCESS(localErrorCode)) { + if (valToCheck == keyAsDouble) { + prefs[numMatching++] = keys[i]; + break; + } + } + } + if (numMatching > 0) { + return; + } + + // If there was no exact match, check for a match based on the plural category + UnicodeString match; + if (isFormattedNumber) { + match = rules->select(context.getNumberOutput(), errorCode); + } else { + match = rules->select(valToCheck - offset); + } + CHECK_ERROR(errorCode); + + for (int32_t i = 0; i < numKeys; i ++) { + if (match == *keys[i]) { + prefs[numMatching++] = keys[i]; + break; + } + } +} + +StandardFunctions::Plural::~Plural() {} +StandardFunctions::PluralFactory::~PluralFactory() {} + +// --------- DateTimeFactory + +static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorCode) { + if (U_SUCCESS(errorCode)) { + UnicodeString upper = option.toUpper(); + if (upper == UnicodeString("FULL")) { + return DateFormat::EStyle::kFull; + } + if (upper == UnicodeString("LONG")) { + return DateFormat::EStyle::kLong; + } + if (upper == UnicodeString("MEDIUM")) { + return DateFormat::EStyle::kMedium; + } + if (upper == UnicodeString("SHORT")) { + return DateFormat::EStyle::kShort; + } + if (upper.isEmpty() || upper == UnicodeString("DEFAULT")) { + return DateFormat::EStyle::kDefault; + } + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + return DateFormat::EStyle::kNone; +} + +Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + Formatter* result = new DateTime(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result; +} + +void StandardFunctions::DateTime::format(FormattingContext& context, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // Argument must be present; + // also, if there is existing string output, that's + // like passing in a string, so we return + if (!context.hasFormattableInput() || context.hasStringOutput()) { + context.setFormattingError(UnicodeString("datetime"), errorCode); + return; + } + + LocalPointer df; + + UnicodeString opt; + if (context.getStringOption(UnicodeString("skeleton"), opt)) { + // Same as getInstanceForSkeleton(), see ICU 9029 + // Based on test/intltest/dtfmttst.cpp - TestPatterns() + LocalPointer generator(DateTimePatternGenerator::createInstance(locale, errorCode)); + UnicodeString pattern = generator->getBestPattern(opt, errorCode); + df.adoptInstead(new SimpleDateFormat(pattern, locale, errorCode)); + } else { + if (context.getStringOption(UnicodeString("pattern"), opt)) { + df.adoptInstead(new SimpleDateFormat(opt, locale, errorCode)); + } else { + DateFormat::EStyle dateStyle = DateFormat::NONE; + if (context.getStringOption(UnicodeString("datestyle"), opt)) { + dateStyle = stringToStyle(opt, errorCode); + } + DateFormat::EStyle timeStyle = DateFormat::NONE; + if (context.getStringOption(UnicodeString("timestyle"), opt)) { + timeStyle = stringToStyle(opt, errorCode); + } + if (dateStyle == DateFormat::NONE && timeStyle == DateFormat::NONE) { + df.adoptInstead(defaultDateTimeInstance(locale, errorCode)); + } else { + df.adoptInstead(DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale)); + if (!df.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + } + } + + CHECK_ERROR(errorCode); + + UnicodeString result; + df->format(context.getFormattableInput(), result, 0, errorCode); + context.setOutput(result); +} + +StandardFunctions::DateTimeFactory::~DateTimeFactory() {} +StandardFunctions::DateTime::~DateTime() {} + +// --------- TextFactory + +Selector* StandardFunctions::TextFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const { + Selector* result = new TextSelector(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result; +} + +void StandardFunctions::TextSelector::selectKey(FormattingContext& context, UnicodeString** keys/*[]*/, int32_t numKeys, UnicodeString** prefs/*[]*/, int32_t& numMatching, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // Just compares the key and value as strings + + // Argument must be present + if (!context.hasFormattableInput()) { + context.setSelectorError(UnicodeString("select"), errorCode); + return; + } + + U_ASSERT(prefs != nullptr); + numMatching = 0; + + // Convert to string + context.formatToString(locale, errorCode); + CHECK_ERROR(errorCode); + if (!context.hasStringOutput()) { + numMatching = 0; + return; + } + + const UnicodeString& formattedValue = context.getStringOutput(); + + for (int32_t i = 0; i < numKeys; i++) { + if (*keys[i] == formattedValue) { + numMatching++; + prefs[0] = keys[i]; + break; + } + } +} + +StandardFunctions::TextFactory::~TextFactory() {} +StandardFunctions::TextSelector::~TextSelector() {} + +// --------- IdentityFactory + +Formatter* StandardFunctions::IdentityFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + Formatter* result = new Identity(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result; + +} + +void StandardFunctions::Identity::format(FormattingContext& context, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // Argument must be present + if (!context.hasFormattableInput()) { + context.setFormattingError(UnicodeString("text"), errorCode); + return; + } + + // Just returns the input value as a string + context.formatToString(locale, errorCode); +} + +StandardFunctions::IdentityFactory::~IdentityFactory() {} +StandardFunctions::Identity::~Identity() {} + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/icu4c/source/i18n/messageformat2_parser.cpp b/icu4c/source/i18n/messageformat2_parser.cpp new file mode 100644 index 000000000000..3907fb75573b --- /dev/null +++ b/icu4c/source/i18n/messageformat2_parser.cpp @@ -0,0 +1,1647 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "unicode/messageformat2_data_model.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +using Binding = MessageFormatDataModel::Binding; +using Bindings = MessageFormatDataModel::Bindings; +using Expression = MessageFormatDataModel::Expression; +using ExpressionList = MessageFormatDataModel::ExpressionList; +using Key = MessageFormatDataModel::Key; +using KeyList = MessageFormatDataModel::KeyList; +using Literal = MessageFormatDataModel::Literal; +using OptionMap = MessageFormatDataModel::OptionMap; +using Operand = MessageFormatDataModel::Operand; +using Operator = MessageFormatDataModel::Operator; +using Pattern = MessageFormatDataModel::Pattern; +using PatternPart = MessageFormatDataModel::PatternPart; +using Reserved = MessageFormatDataModel::Reserved; +using SelectorKeys = MessageFormatDataModel::SelectorKeys; +using VariantMap = MessageFormatDataModel::VariantMap; + +#define PARSER MessageFormatter::Parser + +/* + The `ERROR()` macro sets `errorCode` to `U_SYNTAX_WARNING + and sets the offset in `parseError` to `index`. It does not alter control flow. + + For now, all parse errors are denoted by U_SYNTAX_WARNING. + common/unicode/utypes.h defines a broader set of formatting errors, + but it doesn't capture all possible MessageFormat2 errors and until the + spec is finalized, we'll just use the same error code for all parse errors. + + This is a warning rather than an error due to the need to continue execution + with a fallback string. +*/ +#define ERROR(parseError, errorCode, index) \ + if (!errors.hasSyntaxError()) { \ + setParseError(parseError, index); \ + errors.addSyntaxError(errorCode); \ + } + +// Returns true iff `index` is a valid index for the string `source` +static bool inBounds(const UnicodeString &source, uint32_t index) { + return (((int32_t)index) < source.length()); +} + +// Increments the line number and updates the "characters seen before +// current line" count in `parseError`, iff `source[index]` is a newline +void PARSER::maybeAdvanceLine() { + if (source[index] == LF) { + parseError.line++; + // add 1 to index to get the number of characters seen so far + // (including the newline) + parseError.lengthBeforeCurrentLine = index + 1; + } +} + +/* + Signals an error and returns either if `parseError` already denotes an + error, or `index` is out of bounds for the string `source` +*/ +#define CHECK_BOUNDS(source, index, parseError, errorCode) \ + if (U_FAILURE(errorCode)) { \ + return; \ + } \ + if (!inBounds(source, index)) { \ + ERROR(parseError, errorCode, index); \ + return; \ + } + +/* + Same as CHECK_BOUNDS but returns null +*/ +#define CHECK_BOUNDS_NULL(source, index, parseError, errorCode) \ + if (U_FAILURE(errorCode)) { \ + return nullptr; \ + } \ + if (!inBounds(source, index)) { \ + ERROR(parseError, errorCode, index); \ + return nullptr; \ + } + +// ------------------------------------- +// Helper functions + +static void copyContext(const UChar in[U_PARSE_CONTEXT_LEN], UChar out[U_PARSE_CONTEXT_LEN]) { + for (int32_t i = 0; i < U_PARSE_CONTEXT_LEN; i++) { + out[i] = in[i]; + if (in[i] == '\0') { + break; + } + } +} + +/* static */ void PARSER::translateParseError(const MessageParseError &messageParseError, UParseError &parseError) { + parseError.line = messageParseError.line; + parseError.offset = messageParseError.offset; + copyContext(messageParseError.preContext, parseError.preContext); + copyContext(messageParseError.postContext, parseError.postContext); +} + +/* static */ void PARSER::setParseError(MessageParseError &parseError, uint32_t index) { + // Translate absolute to relative offset + parseError.offset = index // Start with total number of characters seen + - parseError.lengthBeforeCurrentLine; // Subtract all characters before the current line + // TODO: Fill this in with actual pre and post-context + parseError.preContext[0] = 0; + parseError.postContext[0] = 0; +} + +// ------------------------------------- +// Predicates + +// Returns true if `c` is in the interval [`first`, `last`] +static bool inRange(UChar32 c, UChar32 first, UChar32 last) { + U_ASSERT(first < last); + return c >= first && c <= last; +} + +/* + The following helper predicates should exactly match nonterminals in the MessageFormat 2 grammar: + + `isTextChar()` : `text-char` + `isReservedStart()` : `reserved-start` + `isReservedChar()` : `reserved-char` + `isAlpha()` : `ALPHA` + `isDigit()` : `DIGIT` + `isNameStart()` : `name-start` + `isNameChar()` : `name-char` + `isUnquotedStart()` : `unquoted-start` + `isQuotedChar()` : `quoted-char` +*/ +static bool isTextChar(UChar32 c) { + return inRange(c, 0x0000, 0x005B) // Omit backslash + || inRange(c, 0x005D, 0x007A) // Omit { + || c == 0x007C // } + || inRange(c, 0x007E, 0xD7FF) // Omit surrogates + || inRange(c, 0xE000, 0x10FFFF); +} + +static bool isReservedStart(UChar32 c) { + switch (c) { + case BANG: + case AT: + case NUMBER_SIGN: + case PERCENT: + case CARET: + case AMPERSAND: + case ASTERISK: + case LESS_THAN: + case GREATER_THAN: + case QUESTION: + case TILDE: + return true; + default: + return false; + } +} + +static bool isReservedChar(UChar32 c) { + return inRange(c, 0x0000, 0x0008) // Omit HTAB and LF + || inRange(c, 0x000B, 0x000C) // Omit CR + || inRange(c, 0x000E, 0x0019) // Omit SP + || inRange(c, 0x0021, 0x005B) // Omit backslash + || inRange(c, 0x005D, 0x007A) // Omit { | } + || inRange(c, 0x007E, 0xD7FF) // Omit surrogates + || inRange(c, 0xE000, 0x10FFFF); +} + +static bool isAlpha(UChar32 c) { return inRange(c, 0x0041, 0x005A) || inRange(c, 0x0061, 0x007A); } + +static bool isDigit(UChar32 c) { return inRange(c, 0x0030, 0x0039); } + +static bool isNameStart(UChar32 c) { + return isAlpha(c) || c == UNDERSCORE || inRange(c, 0x00C0, 0x00D6) || inRange(c, 0x00D8, 0x00F6) || + inRange(c, 0x00F8, 0x02FF) || inRange(c, 0x0370, 0x037D) || inRange(c, 0x037F, 0x1FFF) || + inRange(c, 0x200C, 0x200D) || inRange(c, 0x2070, 0x218F) || inRange(c, 0x2C00, 0x2FEF) || + inRange(c, 0x3001, 0xD7FF) || inRange(c, 0xF900, 0xFDCF) || inRange(c, 0xFDF0, 0xFFFD) || + inRange(c, 0x10000, 0xEFFFF); +} + +static bool isNameChar(UChar32 c) { + return isNameStart(c) || isDigit(c) || c == HYPHEN || c == PERIOD || c == COLON || c == 0x00B7 || + inRange(c, 0x0300, 0x036F) || inRange(c, 0x203F, 0x2040); +} + +static bool isUnquotedStart(UChar32 c) { + return isNameStart(c) || isDigit(c) || c == PERIOD || c == 0x00B7 || + inRange(c, 0x0300, 0x036F) || inRange(c, 0x203F, 0x2040); +} + +static bool isQuotedChar(UChar32 c) { + return inRange(c, 0x0000, 0x005B) // Omit backslash + || inRange(c, 0x005D, 0x007B) // Omit pipe + || inRange(c, 0x007D, 0xD7FF) // Omit surrogates + || inRange(c, 0xE000, 0x10FFFF); +} + +// Returns true iff `c` can begin a `function` nonterminal +static bool isFunctionStart(UChar32 c) { + switch (c) { + case COLON: + case PLUS: + case HYPHEN: { + return true; + } + default: { + return false; + } + } +} + +// Returns true iff `c` can begin an `annotation` nonterminal +static bool isAnnotationStart(UChar32 c) { + return isFunctionStart(c) || isReservedStart(c); +} + +// Returns true iff `c` can begin either a `reserved-char` or `reserved-escape` +// literal +static bool reservedChunkFollows(UChar32 c) { + switch(c) { + // reserved-escape + case BACKSLASH: + // literal + case PIPE: { + return true; + } + default: { + // reserved-char + return (isReservedChar(c)); + } + } +} + +// ------------------------------------- +// Parsing functions + +/* + This is a recursive-descent scannerless parser that, + with a few exceptions, uses 1 character of lookahead. + +All the exceptions involve ambiguities about the meaning of whitespace. + +There are four ambiguities in the grammar that can't be resolved with finite +lookahead (since whitespace sequences can be arbitrarily long). They are resolved +with a form of backtracking (early exit). No state needs to be saved/restored +since whitespace doesn't affect the shape of the resulting parse tree, so it's +not true backtracking. + +In addition, the grammar has been refactored +in a semantics-preserving way in some cases to make the code easier to structure. + +First: variant = when 1*(s key) [s] pattern + Example: when k {a} + When reading the first space after 'k', it's ambiguous whether it's the + required space before another key, or the optional space before `pattern`. + (See comments in parseNonEmptyKeys()) + +Second: expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" + annotation = (function *(s option)) / reserved + Example: {:f } + When reading the first space after 'f', it's ambiguous whether it's the + required space before an option, or the optional trailing space after an options list + (in this case, the options list is empty). + (See comments in parseOptions() -- handling this case also meant it was easier to base + the code on a slightly refactored grammar, which should be semantically equivalent.) + +Third: expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" + annotation = (function *(s option)) / reserved + Example: {@a } + Similar to the previous case; see comments in parseReserved() + +Fourth: expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" + Example: {|foo| } + When reading the first space after the '|', it's ambiguous whether it's the required + space before an annotation, or the optional trailing space before the '}'. + (See comments in parseLiteralOrVariableWithAnnotation(); handling this case relies on + the same grammar refactoring as the second exception.) + + Most functions match a non-terminal in the grammar, except as explained + in comments. + +Unless otherwise noted in a comment, all helper functions that take + a `source` string, an `index` unsigned int, and an `errorCode` `UErrorCode` + have the precondition: + `index` < `source.length()` + and the postcondition: + `U_FAILURE(errorCode)` || `index < `source.length()` +*/ + +/* + No pre, no post. + A message may end with whitespace, so `index` may equal `source.length()` on exit. +*/ +void PARSER::parseWhitespaceMaybeRequired(bool required, UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + + bool sawWhitespace = false; + + // The loop exits either when we consume all the input, + // or when we see a non-whitespace character. + while (true) { + // Check if all input has been consumed + if (!inBounds(source, index)) { + // If whitespace isn't required -- or if we saw it already -- + // then the caller is responsible for checking this case and + // setting an error if necessary. + if (!required || sawWhitespace) { + // Not an error. + return; + } + // Otherwise, whitespace is required; the end of the input has + // been reached without whitespace. This is an error. + ERROR(parseError, errorCode, index); + return; + } + + // Input remains; process the next character if it's whitespace, + // exit the loop otherwise + if (isWhitespace(source[index])) { + sawWhitespace = true; + // Increment line number in parse error if we consume a newline + maybeAdvanceLine(); + index++; + } else { + break; + } + } + + if (!sawWhitespace && required) { + ERROR(parseError, errorCode, index); + } +} + +/* + No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`. +*/ +void PARSER::parseRequiredWhitespace(UErrorCode &errorCode) { + parseWhitespaceMaybeRequired(true, errorCode); + normalizedInput += SPACE; +} + +/* + No pre, no post, for the same reason as `parseWhitespaceMaybeRequired()`. +*/ +void PARSER::parseOptionalWhitespace(UErrorCode &errorCode) { + parseWhitespaceMaybeRequired(false, errorCode); +} + +// Consumes a single character, signaling an error if `source[index]` != `c` +void PARSER::parseToken(UChar32 c, UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + CHECK_BOUNDS(source, index, parseError, errorCode); + + if (source[index] == c) { + index++; + // Guarantee postcondition + CHECK_BOUNDS(source, index, parseError, errorCode); + normalizedInput += c; + return; + } + // Next character didn't match -- error out + ERROR(parseError, errorCode, index); +} + +/* + Consumes a fixed-length token, signaling an error if the token isn't a prefix of + the string beginning at `source[index]` +*/ +template +void PARSER::parseToken(const UChar32 (&token)[N], UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + + int32_t tokenPos = 0; + while (tokenPos < N - 1) { + if (source[index] != token[tokenPos]) { + ERROR(parseError, errorCode, index); + return; + } + normalizedInput += token[tokenPos]; + index++; + // Guarantee postcondition + CHECK_BOUNDS(source, index, parseError, errorCode); + + tokenPos++; + } +} + +/* + Consumes optional whitespace, possibly advancing `index` to `index'`, + then consumes a fixed-length token (signaling an error if the token isn't a prefix of + the string beginning at `source[index']`), + then consumes optional whitespace again +*/ +template +void PARSER::parseTokenWithWhitespace(const UChar32 (&token)[N], + UErrorCode &errorCode) { + // No need for error check or bounds check before parseOptionalWhitespace + parseOptionalWhitespace(errorCode); + // Establish precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + parseToken(token, errorCode); + parseOptionalWhitespace(errorCode); + // Guarantee postcondition + CHECK_BOUNDS(source, index, parseError, errorCode); +} + +/* + Consumes optional whitespace, possibly advancing `index` to `index'`, + then consumes a single character (signaling an error if it doesn't match + `source[index']`), + then consumes optional whitespace again +*/ +void PARSER::parseTokenWithWhitespace(UChar32 c, + UErrorCode &errorCode) { + // No need for error check or bounds check before parseOptionalWhitespace + parseOptionalWhitespace(errorCode); + // Establish precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + parseToken(c, errorCode); + parseOptionalWhitespace(errorCode); + // Guarantee postcondition + CHECK_BOUNDS(source, index, parseError, errorCode); +} + +/* + Consumes a non-empty sequence of `name-char`s, the first of which is + also a `name-start`. + that begins with a character `start` such that `isNameStart(start)`. + + Initializes `name` to this sequence. + + (Matches the `name` nonterminal in the grammar.) +*/ +void PARSER::parseName(UErrorCode &errorCode, + UnicodeString &name) { + CHECK_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + + if (!isNameStart(source[index])) { + ERROR(parseError, errorCode, index); + return; + } + + while (isNameChar(source[index])) { + name += source[index]; + normalizedInput += source[index]; + index++; + CHECK_BOUNDS(source, index, parseError, errorCode); + } +} + +/* + Consumes a '$' followed by a `name`, initializing `var` to `name`. + + (Matches the `variable` nonterminal in the grammar.) +*/ +void PARSER::parseVariableName(UErrorCode &errorCode, + UnicodeString& var) { + CHECK_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + // If the '$' is missing, we don't want a binding + // for this variable to be created. + bool valid = source[index] == DOLLAR; + parseToken(DOLLAR, errorCode); + CHECK_BOUNDS(source, index, parseError, errorCode); + parseName(errorCode, var); + // Set the name to "" if the variable wasn't + // declared correctly + if (!valid) { + var.remove(); + } +} + +static FunctionName::Sigil functionSigil(UChar32 c) { + switch (c) { + case PLUS: { return FunctionName::Sigil::OPEN; } + case HYPHEN: { return FunctionName::Sigil::CLOSE; } + default: { + U_ASSERT(c == COLON); + return FunctionName::Sigil::DEFAULT; + } + } +} +/* + Consumes a reference to a function, matching the `function` nonterminal in + the grammar. + + Initializes `func` to this name. +*/ +FunctionName* PARSER::parseFunction(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + if (!isFunctionStart(source[index])) { + ERROR(parseError, errorCode, index); + return nullptr; + } + + FunctionName::Sigil sigil = functionSigil(source[index]); + normalizedInput += source[index]; + index++; // Consume the function start character + CHECK_BOUNDS_NULL(source, index, parseError, errorCode); + UnicodeString name; + parseName(errorCode, name); + FunctionName* result = new FunctionName(name, sigil); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + + +/* + Precondition: source[index] == BACKSLASH + + Consume an escaped character. + + Generalized to handle `reserved-escape`, `text-escape`, + or `literal-escape`, depending on the `kind` argument. + + Appends result to `str` +*/ +void PARSER::parseEscapeSequence(EscapeKind kind, + UErrorCode &errorCode, + UnicodeString &str) { + CHECK_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + U_ASSERT(source[index] == BACKSLASH); + normalizedInput += BACKSLASH; + index++; // Skip the initial backslash + CHECK_BOUNDS(source, index, parseError, errorCode); + + #define SUCCEED \ + /* Append to the output string */ \ + str += source[index]; \ + /* Update normalizedInput */ \ + normalizedInput += source[index]; \ + /* Consume the character */ \ + index++; \ + /* Guarantee postcondition */ \ + CHECK_BOUNDS(source, index, parseError, errorCode); \ + return; + + // Expect a '{', '|' or '}' + switch (source[index]) { + case LEFT_CURLY_BRACE: + case RIGHT_CURLY_BRACE: { + // Allowed in a `text-escape` or `reserved-escape` + switch (kind) { + case TEXT: + case RESERVED: { + SUCCEED; + } + default: { + break; + } + } + break; + } + case PIPE: { + // Allowed in a `literal-escape` or `reserved-escape` + switch (kind) { + case LITERAL: + case RESERVED: { + SUCCEED; + } + default: { + break; + } + } + break; + } + case BACKSLASH: { + // Allowed in any escape sequence + SUCCEED; + } + default: { + // No other characters are allowed here + break; + } + } + // If control reaches here, there was an error + ERROR(parseError, errorCode, index); +} + +/* + Consume an escaped pipe or backslash, matching the `literal-escape` + nonterminal in the grammar +*/ +void PARSER::parseLiteralEscape(UErrorCode &errorCode, + UnicodeString &str) { + parseEscapeSequence(LITERAL, errorCode, str); +} + +/* + Consume a literal, matching the `literal` nonterminal in the grammar. + May be quoted or unquoted -- returns true iff quoted +*/ +void PARSER::parseLiteral(UErrorCode &errorCode, bool& quoted, UnicodeString& contents) { + CHECK_ERROR(errorCode); + CHECK_BOUNDS(source, index, parseError, errorCode); + + // Parse the opening '|' if present + if (source[index] == PIPE) { + quoted = true; + parseToken(PIPE, errorCode); + CHECK_BOUNDS(source, index, parseError, errorCode); + } else { + if (!isUnquotedStart(source[index])) { + ERROR(parseError, errorCode, index); + return; + } + quoted = false; + } + + // Parse the contents + bool done = false; + while (!done) { + if (quoted && source[index] == BACKSLASH) { + parseLiteralEscape(errorCode, contents); + } else if ((!quoted && isNameChar(source[index])) + || (quoted && isQuotedChar(source[index]))) { + contents += source[index]; + normalizedInput += source[index]; + index++; // Consume this character + maybeAdvanceLine(); + } else { + // Assume the sequence of literal characters ends here + done = true; + } + CHECK_BOUNDS(source, index, parseError, errorCode); + } + + // Parse the closing '|' if we saw an opening '|' + if (quoted) { + parseToken(PIPE, errorCode); + } + + // Guarantee postcondition + CHECK_BOUNDS(source, index, parseError, errorCode); +} + +/* + Consume a name-value pair, matching the `option` nonterminal in the grammar. + + Adds the option to `optionList` +*/ +void PARSER::parseOption(UErrorCode &errorCode, + Operator::Builder &builder) { + CHECK_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + + // Parse LHS + UnicodeString lhs; + parseName(errorCode, lhs); + + // Parse '=' + parseTokenWithWhitespace(EQUALS, errorCode); + + UnicodeString rhsStr; + LocalPointer rand; + // Parse RHS, which is either a literal or variable + switch (source[index]) { + case DOLLAR: { + parseVariableName(errorCode, rhsStr); + rand.adoptInstead(Operand::create(VariableName(rhsStr), errorCode)); + break; + } + default: { + // Must be a literal + bool isQuoted; + parseLiteral(errorCode, isQuoted, rhsStr); + Literal lit(isQuoted, rhsStr); + rand.adoptInstead(Operand::create(lit, errorCode)); + break; + } + } + // Finally, add the key=value mapping + CHECK_ERROR(errorCode); + builder.addOption(lhs, rand.orphan(), errorCode); +} + +/* + Consume optional whitespace followed by a sequence of options + (possibly empty), separated by whitespace +*/ +void PARSER::parseOptions(UErrorCode &errorCode, Operator::Builder& builder) { + CHECK_ERROR(errorCode); + + // Early exit if out of bounds -- no more work is possible + CHECK_BOUNDS(source, index, parseError, errorCode); + +/* +Arbitrary lookahead is required to parse option lists. To see why, consider +these rules from the grammar: + +expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" +annotation = (function *(s option)) / reserved + +And this example: +{:foo } + +Derivation: +expression -> "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" + -> "{" [s] annotation [s] "}" + -> "{" [s] ((function *(s option)) / reserved) [s] "}" + -> "{" [s] function *(s option) [s] "}" + +In this example, knowing whether to expect a '}' or the start of another option +after the whitespace would require arbitrary lookahead -- in other words, which +rule should we apply? + *(s option) -> s option *(s option) + or + *(s option) -> + +The same would apply to the example {:foo k=v } (note the trailing space after "v"). + +This is addressed using a form of backtracking and (to make the backtracking easier +to apply) a slight refactoring to the grammar. + +This code is written as if the grammar is: + expression = "{" [s] (((literal / variable) ([s] / [s annotation])) / annotation) "}" + annotation = (function *(s option) [s]) / (reserved [s]) + +Parsing the `*(s option) [s]` sequence can be done within `parseOptions()`, meaning +that `parseExpression()` can safely require a '}' after `parseOptions()` finishes. + +Note that when "backtracking" really just means early exit, since only whitespace +is involved and there's no state to save. +*/ + + while(true) { + // If the next character is not whitespace, that means we've already + // parsed the entire options list (which may have been empty) and there's + // no trailing whitespace. In that case, exit. + if (!isWhitespace(source[index])) { + break; + } + + // In any case other than an empty options list, there must be at least + // one whitespace character. + parseRequiredWhitespace(errorCode); + // Restore precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + + // If a name character follows, then at least one more option remains + // in the list. + // Otherwise, we've consumed all the options and any trailing whitespace, + // and can exit. + // Note that exiting is sort of like backtracking: "(s option)" doesn't apply, + // so we back out to [s]. + if (!isNameStart(source[index])) { + // We've consumed all the options (meaning that either we consumed non-empty + // whitespace, or consumed at least one option.) + // Done. + // Remove the whitespace from normalizedInput + U_ASSERT(normalizedInput.truncate(normalizedInput.length() - 1)); + break; + } + parseOption(errorCode, builder); + } +} + +void PARSER::parseReservedEscape(UErrorCode &errorCode, + UnicodeString &str) { + parseEscapeSequence(RESERVED, errorCode, str); +} + +/* + Consumes a non-empty sequence of reserved-chars, reserved-escapes, and + literals (as in 1*(reserved-char / reserved-escape / literal) in the `reserved-body` rule) + + Appends it to `str` +*/ +void PARSER::parseReservedChunk(UErrorCode &errorCode, + Reserved::Builder& result) { + CHECK_ERROR(errorCode); + + bool empty = true; + UnicodeString chunk; + while(reservedChunkFollows(source[index])) { + empty = false; + // reserved-char + if (isReservedChar(source[index])) { + chunk += source[index]; + normalizedInput += source[index]; + // consume the char + index++; + // Restore precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + continue; + } + + if (chunk.length() > 0) { + Literal lit(false, chunk); + result.add(lit, errorCode); + CHECK_ERROR(errorCode); + chunk.setTo(u"", 0); + } + + if (source[index] == BACKSLASH) { + // reserved-escape + parseReservedEscape(errorCode, chunk); + Literal lit(false, chunk); + result.add(lit, errorCode); + CHECK_ERROR(errorCode); + chunk.setTo(u"", 0); + } else if (source[index] == PIPE || isUnquotedStart(source[index])) { + UnicodeString s; + bool isQuoted; + parseLiteral(errorCode, isQuoted, s); + Literal lit(isQuoted, s); + result.add(lit, errorCode); + CHECK_ERROR(errorCode); + } else { + // The reserved chunk ends here + break; + } + } + + // Add the last chunk if necessary + if (chunk.length() > 0) { + Literal lit(false, chunk); + result.add(lit, errorCode); + } + + if (empty) { + ERROR(parseError, errorCode, index); + } +} + +/* + Consume a `reserved-start` character followed by a possibly-empty sequence + of non-empty sequences of reserved characters, separated by whitespace. + Matches the `reserved` nonterminal in the grammar + +*/ +Reserved* PARSER::parseReserved(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + + LocalPointer builder(Reserved::builder(errorCode)); + NULL_ON_ERROR(errorCode); + + // Require a `reservedStart` character + if (!isReservedStart(source[index])) { + ERROR(parseError, errorCode, index); + return nullptr; + } + + // Add the start char as a separate text chunk + UnicodeString firstCharString(source[index]); + Literal firstChunk(false, firstCharString); + builder->add(firstChunk, errorCode); + NULL_ON_ERROR(errorCode); + // Consume reservedStart + normalizedInput += source[index]; + index++; + +/* + Arbitrary lookahead is required to parse a `reserved`, for similar reasons + to why it's required for parsing function annotations. + + In the grammar: + + annotation = (function *(s option)) / reserved + expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" + reserved = reserved-start reserved-body + reserved-body = *( [s] 1*(reserved-char / reserved-escape / literal)) + + When reading a whitespace character, it's ambiguous whether it's the optional + whitespace in this rule, or the optional whitespace that precedes a '}' in an + expression. + + The ambiguity is resolved using the same grammar refactoring as shown in + the comment in `parseOptions()`. +*/ + // Consume reserved characters / literals / reserved escapes + // until a character that can't be in a `reserved-body` is seen + while (true) { + /* + First, if there is whitespace, it means either a chunk follows it, + or this is the trailing whitespace before the '}' that terminates an + expression. + + Next, if the next character can start a reserved-char, reserved-escape, + or literal, then parse a "chunk" of reserved things. + In any other case, we exit successfully, since per the refactored + grammar rule: + annotation = (function *(s option) [s]) / (reserved [s]) + it's valid to consume whitespace after a `reserved`. + (`parseExpression()` is responsible for checking that the next + character is in fact a '}'.) + */ + if (!inBounds(source, index)) { + break; + } + bool sawWhitespace = false; + if (isWhitespace(source[index])) { + sawWhitespace = true; + parseOptionalWhitespace(errorCode); + // Restore precondition + if (!inBounds(source, index)) { + break; + } + } + + if (reservedChunkFollows(source[index])) { + parseReservedChunk(errorCode, *builder); + + // Avoid looping infinitely + if (!inBounds(source, index)) { + break; + } + } else { + if (sawWhitespace) { + if (source[index] == RIGHT_CURLY_BRACE) { + // Not an error: just means there's no trailing whitespace + // after this `reserved` + break; + } + // Error: if there's whitespace, it must either be followed + // by a non-empty sequence or by '}' + ERROR(parseError, errorCode, index); + break; + } + // If there was no whitespace, it's not an error, + // just the end of the reserved string + break; + } + } + + LocalPointer r(builder->build(errorCode)); + NULL_ON_ERROR(errorCode); + return r.orphan(); +} + + +/* + Consume a function call or reserved string, matching the `annotation` + nonterminal in the grammar + + Returns an `Operator` representing this (a reserved is a parse error) +*/ +Operator* PARSER::parseAnnotation(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + LocalPointer ratorBuilder(Operator::builder(errorCode)); + NULL_ON_ERROR(errorCode); + if (isFunctionStart(source[index])) { + // Consume the function name + LocalPointer func(parseFunction(errorCode)); + NULL_ON_ERROR(errorCode); + ratorBuilder->setFunctionName(*func, errorCode); + // Consume the options (which may be empty) + parseOptions(errorCode, *ratorBuilder); + } else { + // Must be reserved + // A reserved sequence is not a parse error, but might be a formatting error + LocalPointer rator(parseReserved(errorCode)); + NULL_ON_ERROR(errorCode); + ratorBuilder->setReserved(rator.orphan()); + } + return ratorBuilder->build(errorCode); +} + +/* + Consume a literal or variable (depending on `isVariable`), + followed by either required whitespace followed by an annotation, + or optional whitespace. +*/ +void PARSER::parseLiteralOrVariableWithAnnotation(bool isVariable, + UErrorCode &errorCode, + Expression::Builder& builder) { + CHECK_ERROR(errorCode); + + U_ASSERT(inBounds(source, index)); + + LocalPointer adoptedRand; + if (isVariable) { + UnicodeString var; + parseVariableName(errorCode, var); + adoptedRand.adoptInstead(Operand::create(VariableName(var), errorCode)); + } else { + UnicodeString s; + bool isQuoted; + parseLiteral(errorCode, isQuoted, s); + Literal lit(isQuoted, s); + adoptedRand.adoptInstead(Operand::create(lit, errorCode)); + } + + // Set the operand (if allocation succeeded) + CHECK_ERROR(errorCode); + builder.setOperand(adoptedRand.orphan()); + +/* +Parsing a literal or variable with an optional annotation requires arbitrary lookahead. +To see why, consider this rule from the grammar: + +expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" + +And this example: + +{|foo| } + +Derivation: +expression -> "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" + -> "{" [s] ((literal / variable) [s annotation]) [s] "}" + -> "{" [s] (literal [s annotation]) [s] "}" + +When reading the ' ' after the second '|', it's ambiguous whether that's the required +space before an annotation, or the optional space before the '}'. + +To make this ambiguity easier to handle, this code is based on the same grammar +refactoring for the `expression` nonterminal that `parseOptions()` relies on. See +the comment in `parseOptions()` for details. +*/ + + LocalPointer result; + if (isWhitespace(source[index])) { + // If the next character is whitespace, either [s annotation] or [s] applies + // (the character is either the required space before an annotation, or optional + // trailing space after the literal or variable). It's still ambiguous which + // one does apply. + parseOptionalWhitespace(errorCode); + // Restore precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + + // This next check resolves the ambiguity between [s annotation] and [s] + if (isAnnotationStart(source[index])) { + normalizedInput += SPACE; + // The previously consumed whitespace precedes an annotation + LocalPointer adoptedRator(parseAnnotation(errorCode)); + CHECK_ERROR(errorCode); + builder.setOperator(adoptedRator.orphan()); + } + } else { + // Either there was never whitespace, or + // the previously consumed whitespace is the optional trailing whitespace; + // either the next character is '}' or the error will be handled by parseExpression. + // Do nothing, since the operand was already set + } +} + +/* + Consume an expression, matching the `expression` nonterminal in the grammar +*/ + +static void exprFallback(Expression::Builder& exprBuilder, UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + + // Construct a literal consisting just of The U+FFFD REPLACEMENT CHARACTER + // per https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution + LocalPointer fallbackOperand(Operand::create(Literal(false, UnicodeString(REPLACEMENT)), errorCode)); + CHECK_ERROR(errorCode); + + exprBuilder.setOperand(fallbackOperand.orphan()); +} + +static Expression* exprFallback(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer exprBuilder(Expression::builder(errorCode)); + NULL_ON_ERROR(errorCode); + + // Construct a literal consisting just of The U+FFFD REPLACEMENT CHARACTER + // per https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution + LocalPointer fallbackOperand(Operand::create(Literal(false, UnicodeString(REPLACEMENT)), errorCode)); + NULL_ON_ERROR(errorCode); + + exprBuilder->setOperand(fallbackOperand.orphan()); + return exprBuilder->build(errorCode); +} + +// Sets `parseError` to true if there was an error parsing this expression +// Uses a flag rather than just returning a fallback expression because which +// fallback to use depends on context +Expression* PARSER::parseExpression(bool& err, UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + err = false; + + // Early return if out of input -- no more work is possible + // (and parseExpression shouldn't be able to return null if U_SUCCESS(errorCode)) + U_ASSERT(inBounds(source, index)); + + // Parse opening brace + parseToken(LEFT_CURLY_BRACE, errorCode); + // Optional whitespace after opening brace + parseOptionalWhitespace(errorCode); + + LocalPointer exprBuilder(Expression::builder(errorCode)); + NULL_ON_ERROR(errorCode); + // Restore precondition + if (!inBounds(source, index)) { + exprFallback(*exprBuilder, errorCode); + } else { + // literal '|', variable '$' or annotation + switch (source[index]) { + case PIPE: { + // Quoted literal + parseLiteralOrVariableWithAnnotation(false, errorCode, *exprBuilder); + break; + } + case DOLLAR: { + // Variable + parseLiteralOrVariableWithAnnotation(true, errorCode, *exprBuilder); + break; + } + default: { + if (isAnnotationStart(source[index])) { + LocalPointer rator(parseAnnotation(errorCode)); + NULL_ON_ERROR(errorCode); + exprBuilder->setOperator(rator.orphan()); + } else if (isUnquotedStart(source[index])) { + // Unquoted literal + parseLiteralOrVariableWithAnnotation(false, errorCode, *exprBuilder); + } else { + // Not a literal, variable or annotation -- error out + ERROR(parseError, errorCode, index); + // Set the operand in order to avoid an invalid state error -- + // however, the caller will ignore the result + exprFallback(*exprBuilder, errorCode); + err = true; + break; + } + break; + } + } + } + // For why we don't parse optional whitespace here, even though the grammar + // allows it, see comments in parseLiteralWithAnnotation() and parseOptions() + + // Parse closing brace + parseToken(RIGHT_CURLY_BRACE, errorCode); + + return exprBuilder->build(errorCode); +} + +/* + Consume a possibly-empty sequence of declarations separated by whitespace; + each declaration matches the `declaration` nonterminal in the grammar + + Builds up an environment representing those declarations +*/ +void PARSER::parseDeclarations(UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + + // End-of-input here would be an error; even empty + // declarations must be followed by a body + CHECK_BOUNDS(source, index, parseError, errorCode); + + while (source[index] == ID_LET[0]) { + parseToken(ID_LET, errorCode); + parseRequiredWhitespace(errorCode); + // Restore precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + UnicodeString lhs; + parseVariableName(errorCode, lhs); + parseTokenWithWhitespace(EQUALS, errorCode); + + // Restore precondition before calling parseExpression() + // (which must return a non-null value) + CHECK_BOUNDS(source, index, parseError, errorCode); + + bool rhsError = false; + LocalPointer rhs(parseExpression(rhsError, errorCode)); + if (rhsError) { + rhs.adoptInstead(exprFallback(errorCode)); + } + parseOptionalWhitespace(errorCode); + // Restore precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + + if (U_FAILURE(errorCode)) { + return; + } + // Add binding from lhs to rhs, unless there was an error + if (lhs.length() > 0) { + dataModel.addLocalVariable(lhs, rhs.orphan(), errorCode); + } + } +} + +/* + Consume an escaped curly brace, or backslash, matching the `text-escape` + nonterminal in the grammar +*/ +void PARSER::parseTextEscape(UErrorCode &errorCode, UnicodeString &str) { + parseEscapeSequence(TEXT, errorCode, str); +} + +/* + Consume a non-empty sequence of text characters and escaped text characters, + matching the `text` nonterminal in the grammar +*/ +void PARSER::parseText(UErrorCode &errorCode, UnicodeString &str) { + CHECK_ERROR(errorCode); + CHECK_BOUNDS(source, index, parseError, errorCode); + + bool empty = true; + + while (true) { + if (source[index] == BACKSLASH) { + parseTextEscape(errorCode, str); + } else if (isTextChar(source[index])) { + normalizedInput += source[index]; + str += source[index]; + index++; + maybeAdvanceLine(); + } else { + break; + } + // Restore precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + empty = false; + } + + if (empty) { + // text must be non-empty + ERROR(parseError, errorCode, index); + } +} + +/* + Consume an `nmtoken`, `literal`, or the string "*", matching + the `key` nonterminal in the grammar +*/ +Key* PARSER::parseKey(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + U_ASSERT(inBounds(source, index)); + + LocalPointer k; + // Literal | '*' + switch (source[index]) { + case ASTERISK: { + index++; + // Guarantee postcondition + CHECK_BOUNDS_NULL(source, index, parseError, errorCode); + k.adoptInstead(Key::create(errorCode)); + normalizedInput += ASTERISK; + break; + } + default: { + // Literal + UnicodeString s; + bool isQuoted; + parseLiteral(errorCode, isQuoted, s); + Literal lit(isQuoted, s); + k.adoptInstead(Key::create(lit, errorCode)); + break; + } + } + + NULL_ON_ERROR(errorCode); + return k.orphan(); +} + +MessageFormatDataModel::SelectorKeys::Builder* MessageFormatDataModel::SelectorKeys::builder(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer result(new MessageFormatDataModel::SelectorKeys::Builder(errorCode)); + if (U_FAILURE(errorCode)) { + return nullptr; + } + return result.orphan(); +} + +MessageFormatDataModel::SelectorKeys* MessageFormatDataModel::SelectorKeys::Builder::build(UErrorCode &errorCode) const { + NULL_ON_ERROR(errorCode); + + LocalPointer ks(keys->build(errorCode)); + NULL_ON_ERROR(errorCode); + // Key list must be non-empty (this should be checked earlier on) + U_ASSERT(ks->length() >= 1); + SelectorKeys* result = new SelectorKeys(ks.orphan()); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +/* + Consume a non-empty sequence of `key`s separated by whitespace + + Takes ownership of `keys` +*/ +MessageFormatDataModel::SelectorKeys* PARSER::parseNonEmptyKeys(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + U_ASSERT(inBounds(source, index)); + +/* +Arbitrary lookahead is required to parse key lists. To see why, consider +this rule from the grammar: + +variant = when 1*(s key) [s] pattern + +And this example: +when k1 {a} + +Derivation: + variant -> when 1*(s key) [s] pattern + -> when s key *(s key) [s] pattern + +After matching ' ' to `s` and 'k1' to `key`, it would require arbitrary lookahead +to know whether to expect the start of a pattern or the start of another key. +In other words: is the second whitespace sequence the required space in 1*(s key), +or the optional space in [s] pattern? + +This is addressed using "backtracking" (similarly to `parseOptions()`). +*/ + + LocalPointer keysBuilder(MessageFormatDataModel::SelectorKeys::builder(errorCode)); + NULL_ON_ERROR(errorCode); + + // Since the first key is required, it's simplest to parse the required + // whitespace and then the first key separately. + parseRequiredWhitespace(errorCode); + // Restore precondition + CHECK_BOUNDS_NULL(source, index, parseError, errorCode); + LocalPointer k(parseKey(errorCode)); + if (U_SUCCESS(errorCode)) { + keysBuilder->add(k.orphan(), errorCode); + } + +/* + // Try to recover from errors, e.g. when*{foo} + if (!isWhitespace(source[index])) { + while (inBounds(source, index) && source[index] != LEFT_CURLY_BRACE) { + index++; + } + } +*/ + + // We've seen at least one whitespace-key pair, so now we can parse + // *(s key) [s] + while (source[index] != LEFT_CURLY_BRACE) { // Try to recover from errors + bool wasWhitespace = isWhitespace(source[index]); + parseRequiredWhitespace(errorCode); + if (!wasWhitespace) { + // Avoid infinite loop when parsing something like: + // when * @{!... + index++; + } + + // Restore precondition + CHECK_BOUNDS_NULL(source, index, parseError, errorCode); + + // At this point, it's ambiguous whether we are inside (s key) or [s]. + // This check resolves that ambiguity. + if (source[index] == LEFT_CURLY_BRACE) { + // A pattern follows, so what we just parsed was the optional + // trailing whitespace. All the keys have been parsed. + + // Unpush the whitespace from `normalizedInput` + U_ASSERT(normalizedInput.truncate(normalizedInput.length() - 1)); + break; + } + k.adoptInstead(parseKey(errorCode)); + if (U_SUCCESS(errorCode)) { + keysBuilder->add(k.orphan(), errorCode); + } + } + + // Check error code so we won't overwrite the error + if (U_FAILURE(errorCode)) { + return nullptr; + } + + return keysBuilder->build(errorCode); +} + +/* + Consume a `pattern`, matching the nonterminal in the grammar + No postcondition (on return, `index` might equal `source.length()` with U_SUCCESS(errorCode)), + because a message can end with a pattern +*/ +Pattern* PARSER::parsePattern(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + U_ASSERT(inBounds(source, index)); + + LocalPointer result(Pattern::builder(errorCode)); + // Fail immediately if the pattern builder can't be constructed + NULL_ON_ERROR(errorCode); + + parseToken(LEFT_CURLY_BRACE, errorCode); + + LocalPointer expression; + LocalPointer part; + while (source[index] != RIGHT_CURLY_BRACE) { + switch (source[index]) { + case LEFT_CURLY_BRACE: { + // Must be expression + bool rhsError = false; + expression.adoptInstead(parseExpression(rhsError, errorCode)); + NULL_ON_ERROR(errorCode); + part.adoptInstead(PatternPart::create(expression.orphan(), errorCode)); + NULL_ON_ERROR(errorCode); + result->add(part.orphan(), errorCode); + break; + } + default: { + // Must be text + UnicodeString s; + parseText(errorCode, s); + part.adoptInstead(PatternPart::create(s, errorCode)); + NULL_ON_ERROR(errorCode); + result->add(part.orphan(), errorCode); + break; + } + } + // Need an explicit error check here so we don't loop infinitely + NULL_ON_ERROR(errorCode); + if (!inBounds(source, index)) { + // Avoid returning null + return result->build(errorCode); + } + } + // Consume the closing brace + index++; + normalizedInput += RIGHT_CURLY_BRACE; + + return result->build(errorCode); +} + + +/* + Consume a `selectors` (matching the nonterminal in the grammar), + followed by a non-empty sequence of `variant`s (matching the nonterminal + in the grammar) preceded by whitespace + No postcondition (on return, `index` might equal `source.length()` with U_SUCCESS(errorCode)), + because a message can end with a variant +*/ +void PARSER::parseSelectors(UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + U_ASSERT(inBounds(source, index)); + + parseToken(ID_MATCH, errorCode); + + LocalPointer expression; + bool empty = true; + // Parse selectors + while (isWhitespace(source[index]) || source[index] == LEFT_CURLY_BRACE) { + parseOptionalWhitespace(errorCode); + // Restore precondition + CHECK_BOUNDS(source, index, parseError, errorCode); + if (source[index] != LEFT_CURLY_BRACE) { + // This is not necessarily an error, but rather, + // means the whitespace we parsed was the optional + // whitespace preceding the first variant, not the + // optional whitespace preceding a subsequent expression. + break; + } + + bool selectorError = false; + expression.adoptInstead(parseExpression(selectorError, errorCode)); + if (selectorError) { + // What happens if one of the variant keys is the + // fallback string? this should be a `nomatch` according + // to the spec, but there's no way to pass that through + expression.adoptInstead(exprFallback(errorCode)); + } + empty = false; + + if (U_FAILURE(errorCode)) { + break; + } + dataModel.addSelector(expression.orphan(), errorCode); + } + + // At least one selector is required + if (empty) { + if (U_SUCCESS(errorCode)) { + ERROR(parseError, errorCode, index); + } + return; + } + + #define CHECK_END_OF_INPUT \ + if (((int32_t)index) >= source.length()) { \ + break; \ + } \ + + // Parse variants + while (isWhitespace(source[index]) || source[index] == ID_WHEN[0]) { + parseOptionalWhitespace(errorCode); + // Restore the precondition, *without* erroring out if we've + // reached the end of input. That's because it's valid for the + // message to end with trailing whitespace that follows a variant. + CHECK_END_OF_INPUT + + // Consume the "when" + parseToken(ID_WHEN, errorCode); + + // At least one key is required + LocalPointer keyList(parseNonEmptyKeys(errorCode)); + CHECK_ERROR(errorCode); + + // parseNonEmptyKeys() consumes any trailing whitespace, + // so the pattern can be consumed next. + + // Restore precondition before calling parsePattern() + // (which must return a non-null value) + CHECK_BOUNDS(source, index, parseError, errorCode); + LocalPointer rhs(parsePattern(errorCode)); + if (U_FAILURE(errorCode)) { + break; + } + + dataModel.addVariant(keyList.orphan(), rhs.orphan(), errorCode); + if (U_FAILURE(errorCode)) { + break; + } + + // Restore the precondition, *without* erroring out if we've + // reached the end of input. That's because it's valid for the + // message to end with a variant that has no trailing whitespace. + // Why do we need to check this condition twice inside the loop? + // Because if we don't check it here, the `isWhitespace()` call in + // the loop head will read off the end of the input string. + CHECK_END_OF_INPUT + } +} + +/* + Consume a `body` (matching the nonterminal in the grammar), + No postcondition (on return, `index` might equal `source.length()` with U_SUCCESS(errorCode)), + because a message can end with a body (trailing whitespace is optional) +*/ + +void PARSER::errorPattern(UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + errors.addSyntaxError(errorCode); + // Set to empty pattern + LocalPointer result(Pattern::builder(errorCode)); + CHECK_ERROR(errorCode); + // If still in bounds, then add the remaining input as a single text part + // to the pattern + /* + TODO: this behavior isn't documented in the spec, but it comes from + https://github.com/messageformat/messageformat/blob/e0087bff312d759b67a9129eac135d318a1f0ce7/packages/mf2-messageformat/src/__fixtures/test-messages.json#L236 + and a pending pull request https://github.com/unicode-org/message-format-wg/pull/462 will clarify + whether this is the intent behind the spec + */ + UnicodeString partStr(LEFT_CURLY_BRACE); + while (inBounds(source, index)) { + partStr += source[index++]; + } + // Add curly braces around the entire output (same comment as above) + partStr += RIGHT_CURLY_BRACE; + LocalPointer part(PatternPart::create(partStr, errorCode)); + if (U_SUCCESS(errorCode)) { + result->add(part.orphan(), errorCode); + } + dataModel.setPattern(result->build(errorCode)); +} + +void PARSER::parseBody(UErrorCode &errorCode) { + CHECK_ERROR(errorCode); + // Out-of-input is a syntax warning + if (!inBounds(source, index)) { + errorPattern(errorCode); + return; + } + + // Body must be either a pattern or selectors + switch (source[index]) { + case LEFT_CURLY_BRACE: { + // Pattern + LocalPointer pattern(parsePattern(errorCode)); + CHECK_ERROR(errorCode); + dataModel.setPattern(pattern.orphan()); + break; + } + case ID_MATCH[0]: { + // Selectors + parseSelectors(errorCode); + return; + } + default: { + ERROR(parseError, errorCode, index); + errorPattern(errorCode); + return; + } + } +} + +// ------------------------------------- +// Parses the source pattern. + +void PARSER::parse(UParseError &parseErrorResult, + UErrorCode &errorCode) { + // Return immediately in the case of a previous error + CHECK_ERROR(errorCode); + + // parseOptionalWhitespace() succeeds on an empty string, so don't check bounds yet + parseOptionalWhitespace(errorCode); + // parseDeclarations() requires there to be input left, so check to see if + // parseOptionalWhitespace() consumed it all + + // Skip the check if errorCode is already set, so as to avoid overwriting a + // previous error offset + if (U_SUCCESS(errorCode) && !inBounds(source, index)) { + ERROR(parseError, errorCode, index); + } + + parseDeclarations(errorCode); + parseBody(errorCode); + parseOptionalWhitespace(errorCode); + + // There are no errors; finally, check that the entire input was consumed + // Skip the check if errorCode is already set, so as to avoid overwriting a + // previous error offset + if (U_SUCCESS(errorCode)) { + if (((int32_t)index) != source.length()) { + ERROR(parseError, errorCode, index); + } + } + + // Finally, copy the relevant fields of the internal `MessageParseError` + // into the `UParseError` argument + translateParseError(parseError, parseErrorResult); +} + +PARSER::~Parser() {} + + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/icu4c/source/i18n/messageformat2_serializer.cpp b/icu4c/source/i18n/messageformat2_serializer.cpp new file mode 100644 index 000000000000..fdd264ae6c22 --- /dev/null +++ b/icu4c/source/i18n/messageformat2_serializer.cpp @@ -0,0 +1,284 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "unicode/messageformat2_data_model.h" +#include "uvector.h" // U_ASSERT + +U_NAMESPACE_BEGIN namespace message2 { + +// Generates a string representation of a data model +// ------------------------------------------------ + +using Binding = MessageFormatDataModel::Binding; +using Bindings = MessageFormatDataModel::Bindings; +using Key = MessageFormatDataModel::Key; +using SelectorKeys = MessageFormatDataModel::SelectorKeys; +using KeyList = MessageFormatDataModel::KeyList; +using Literal = MessageFormatDataModel::Literal; +using OptionMap = MessageFormatDataModel::OptionMap; +using Expression = MessageFormatDataModel::Expression; +using ExpressionList = MessageFormatDataModel::ExpressionList; +using Operand = MessageFormatDataModel::Operand; +using Operator = MessageFormatDataModel::Operator; +using Pattern = MessageFormatDataModel::Pattern; +using PatternPart = MessageFormatDataModel::PatternPart; +using Reserved = MessageFormatDataModel::Reserved; +using VariantMap = MessageFormatDataModel::VariantMap; + +#define SERIALIZER MessageFormatter::Serializer + +// Private helper methods + +void SERIALIZER::whitespace() { + result += SPACE; +} + +void SERIALIZER::emit(UChar32 c) { + result += c; +} + +void SERIALIZER::emit(const UnicodeString& s) { + result += s; +} + +template +void SERIALIZER::emit(const UChar32 (&token)[N]) { + // Don't emit the terminator + for (int32_t i = 0; i < N - 1; i++) { + emit(token[i]); + } +} + +void SERIALIZER::emit(const FunctionName& f) { + emit(f.toString()); +} + +void SERIALIZER::emit(const VariableName& v) { + emit(v.declaration()); +} + +void SERIALIZER::emit(const Literal& l) { + if (l.quoted()) { + emit(PIPE); + const UnicodeString& contents = l.stringContents(); + for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) { + // Re-escape any PIPE or BACKSLASH characters + switch(contents[i]) { + case BACKSLASH: + case PIPE: { + emit(BACKSLASH); + break; + } + default: { + break; + } + } + emit(contents[i]); + } + emit(PIPE); + } else { + emit(l.stringContents()); + } +} + +void SERIALIZER::emit(const Key& k) { + if (k.isWildcard()) { + emit(ASTERISK); + return; + } + emit(k.asLiteral()); +} + +void SERIALIZER::emit(const SelectorKeys& k) { + const KeyList& ks = k.getKeys(); + int32_t len = ks.length(); + // It would be an error for `keys` to be empty; + // that would mean this is the single `pattern` + // variant, and in that case, this method shouldn't be called + U_ASSERT(len > 0); + for (int32_t i = 0; i < len; i++) { + if (i != 0) { + whitespace(); + } + emit(*ks.get(i)); + } +} + +void SERIALIZER::emit(const Operand& rand) { + U_ASSERT(!rand.isNull()); + + if (rand.isVariable()) { + emit(rand.asVariable()); + } else { + // Literal: quoted or unquoted + emit(rand.asLiteral()); + } +} + +void SERIALIZER::emit(const OptionMap& options) { + int32_t pos = OptionMap::FIRST; + UnicodeString k; + const Operand* v; + while (options.next(pos, k, v)) { + whitespace(); + emit(k); + emit(EQUALS); + emit(*v); + } +} + +void SERIALIZER::emit(const Expression& expr) { + emit(LEFT_CURLY_BRACE); + + if (!expr.isReserved() && !expr.isFunctionCall()) { + // Literal or variable, no annotation + emit(expr.getOperand()); + } else { + // Function call or reserved + if (!expr.isStandaloneAnnotation()) { + // Must be a function call that has an operand + emit(expr.getOperand()); + whitespace(); + } + const Operator& rator = expr.getOperator(); + if (rator.isReserved()) { + const Reserved& reserved = rator.asReserved(); + // Re-escape '\' / '{' / '|' / '}' + for (int32_t i = 0; i < reserved.numParts(); i++) { + const Literal& l = *reserved.getPart(i); + if (l.quoted()) { + emit(l); + } else { + const UnicodeString& s = l.stringContents(); + for (int32_t j = 0; ((int32_t) j) < s.length(); j++) { + switch(s[j]) { + case LEFT_CURLY_BRACE: + case PIPE: + case RIGHT_CURLY_BRACE: + case BACKSLASH: { + emit(BACKSLASH); + break; + } + default: + break; + } + emit(s[j]); + } + } + } + } else { + emit(rator.getFunctionName()); + // No whitespace after function name, in case it has + // no options. (when there are options, emit(OptionMap) will + // emit the leading whitespace) + emit(rator.getOptions()); + } + } + + emit(RIGHT_CURLY_BRACE); +} + +void SERIALIZER::emit(const PatternPart& part) { + if (part.isText()) { + // Raw text + const UnicodeString& text = part.asText(); + // Re-escape '{'/'}'/'\' + for (int32_t i = 0; ((int32_t) i) < text.length(); i++) { + switch(text[i]) { + case BACKSLASH: + case LEFT_CURLY_BRACE: + case RIGHT_CURLY_BRACE: { + emit(BACKSLASH); + break; + } + default: + break; + } + emit(text[i]); + } + return; + } + // Expression + emit(part.contents()); +} + +void SERIALIZER::emit(const Pattern& pat) { + int32_t len = pat.numParts(); + emit(LEFT_CURLY_BRACE); + for (int32_t i = 0; i < len; i++) { + // No whitespace is needed here -- see the `pattern` nonterminal in the grammar + emit(*pat.getPart(i)); + } + emit(RIGHT_CURLY_BRACE); +} + +void SERIALIZER::serializeDeclarations() { + const Bindings& locals = dataModel.getLocalVariables(); + + for (int32_t i = 0; i < locals.length(); i++) { + const Binding& b = *locals.get(i); + // No whitespace needed here -- see `message` in the grammar + emit(ID_LET); + whitespace(); + emit(b.getVariable()); + // No whitespace needed here -- see `declaration` in the grammar + emit(EQUALS); + // No whitespace needed here -- see `declaration` in the grammar + emit(b.getValue()); + } +} + +void SERIALIZER::serializeSelectors() { + U_ASSERT(dataModel.hasSelectors()); + const ExpressionList& selectors = dataModel.getSelectors(); + int32_t len = selectors.length(); + U_ASSERT(len > 0); + + emit(ID_MATCH); + for (int32_t i = 0; i < len; i++) { + // No whitespace needed here -- see `selectors` in the grammar + emit(*selectors.get(i)); + } +} + +void SERIALIZER::serializeVariants() { + U_ASSERT(dataModel.hasSelectors()); + const VariantMap& variants = dataModel.getVariants(); + int32_t pos = VariantMap::FIRST; + + const SelectorKeys* selectorKeys; + const Pattern* pattern; + + while (variants.next(pos, selectorKeys, pattern)) { + emit(ID_WHEN); + whitespace(); + emit(*selectorKeys); + // No whitespace needed here -- see `variant` in the grammar + emit(*pattern); + } +} + + +// Main (public) serializer method +void SERIALIZER::serialize() { + serializeDeclarations(); + // Pattern message + if (!dataModel.hasSelectors()) { + emit(dataModel.getPattern()); + } else { + // Selectors message + serializeSelectors(); + serializeVariants(); + } +} + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/icu4c/source/i18n/messageformat2_utils_impl.h b/icu4c/source/i18n/messageformat2_utils_impl.h new file mode 100644 index 000000000000..b8cdd2a3671a --- /dev/null +++ b/icu4c/source/i18n/messageformat2_utils_impl.h @@ -0,0 +1,306 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT_UTILS_IMPL_H +#define MESSAGEFORMAT_UTILS_IMPL_H + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +template +int32_t ImmutableVector::length() const { + U_ASSERT(!isBogus()); + return contents->size(); +} + +template +const T* ImmutableVector::get(int32_t i) const { + // Because UVector::element() returns a void*, + // to avoid either copying the result or returning a reference + // to a temporary value, get() returns a T* + U_ASSERT(!isBogus()); + U_ASSERT(i < length()); + return static_cast(contents->elementAt(i)); +} + +// Returns true iff this contains `element` +template +UBool ImmutableVector::contains(const T& element) const { + U_ASSERT(!isBogus()); + + int32_t index; + return find(element, index); +} + +// Returns true iff this contains `element` and returns +// its first index in `index`. Returns false otherwise +template +UBool ImmutableVector::find(const T& element, int32_t& index) const { + U_ASSERT(!isBogus()); + + for (int32_t i = 0; i < length(); i++) { + if (*(get(i)) == element) { + index = i; + return true; + } + } + return false; +} + +// Copy constructor +template +ImmutableVector::ImmutableVector(const ImmutableVector& other) { + UErrorCode errorCode = U_ZERO_ERROR; + U_ASSERT(!other.isBogus()); + contents.adoptInstead(new UVector(other.length(), errorCode)); + if (U_FAILURE(errorCode)) { + contents.adoptInstead(nullptr); + return; + } + contents->setDeleter(uprv_deleteUObject); + contents->assign(*other.contents, ©Elements, errorCode); + if (U_FAILURE(errorCode)) { + contents.adoptInstead(nullptr); + } +} + +// Adopts its argument +template +typename ImmutableVector::Builder& ImmutableVector::Builder::add(T *element, UErrorCode &errorCode) { + THIS_ON_ERROR(errorCode); + U_ASSERT(contents != nullptr); + contents->adoptElement(element, errorCode); + return *this; +} + +// Postcondition: U_FAILURE(errorCode) or returns a list such that isBogus() = false +template +ImmutableVector* ImmutableVector::Builder::build(UErrorCode &errorCode) const { + NULL_ON_ERROR(errorCode); + LocalPointer> adopted(buildList(*this, errorCode)); + if (!adopted.isValid() || adopted->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return adopted.orphan(); +} + +template +ImmutableVector::Builder::Builder(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + contents.adoptInstead(new UVector(errorCode)); + CHECK_ERROR(errorCode); + contents->setDeleter(uprv_deleteUObject); +} + +template +/* static */ typename ImmutableVector::Builder* ImmutableVector::builder(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer result(new Builder(errorCode)); + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +// Helper functions for vector copying +// T1 must have a copy constructor +// This may leave dst->pointer == nullptr, which is handled by the UVector assign() method +template +template +/* static */ void ImmutableVector::copyElements(UElement *dst, UElement *src) { + dst->pointer = new T1(*(static_cast(src->pointer))); +} + + // Copies the contents of `builder` +// This won't compile unless T is a type that has a copy assignment operator +template +/* static */ ImmutableVector* ImmutableVector::buildList(const Builder &builder, UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + ImmutableVector* result; + U_ASSERT(builder.contents != nullptr); + + LocalPointer adoptedContents(new UVector(builder.contents->size(), errorCode)); + NULL_ON_ERROR(errorCode); + adoptedContents->setDeleter(uprv_deleteUObject); + adoptedContents->assign(*builder.contents, ©Elements, errorCode); + NULL_ON_ERROR(errorCode); + result = new ImmutableVector(adoptedContents.orphan()); + + // Finally, check for null + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +// Iterates over keys in the order in which they were added. +// Returns true iff `pos` indicates that there are elements + // remaining +template +UBool OrderedMap::next(int32_t &pos, UnicodeString& k, const V*& v) const { + U_ASSERT(!isBogus()); + U_ASSERT(pos >= FIRST); + if (pos >= size()) { + return false; + } + k = *((UnicodeString*)keys->elementAt(pos)); + v = (V*) contents->get(k); + pos = pos + 1; + return true; +} + +template +int32_t OrderedMap::size() const { + U_ASSERT(!isBogus()); + return keys->size(); +} + +// Copy constructor +template +OrderedMap::OrderedMap(const OrderedMap& other) : contents(copyHashtable(*other.contents)), keys(copyStringVector(*other.keys)) { + U_ASSERT(!other.isBogus()); +} + +// Adopts `value` +// Precondition: `key` is not already in the map. (The caller must +// check this) +template +typename OrderedMap::Builder& OrderedMap::Builder::add(const UnicodeString& key, V* value, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + // Check that the key is not already in the map. + // (If not for this check, the invariant that keys->size() + // == contents->count() could be violated.) + U_ASSERT(!contents->containsKey(key)); + // Copy `key` so it can be stored in the vector + LocalPointer adoptedKey(new UnicodeString(key)); + if (!adoptedKey.isValid()) { + return *this; + } + UnicodeString* k = adoptedKey.orphan(); + keys->adoptElement(k, errorCode); + contents->put(key, value, errorCode); + return *this; +} + +// This is provided so that builders can check for duplicate keys +// (for example, adding duplicate options is an error) +template +UBool OrderedMap::Builder::has(const UnicodeString& key) const { + return contents->containsKey(key); +} + +// Copying `build()` (leaves `this` valid) +template +OrderedMap* OrderedMap::Builder::build(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + + LocalPointer adoptedContents(copyHashtable(*contents)); + LocalPointer adoptedKeys(copyStringVector(*keys)); + + if (!adoptedContents.isValid() || !adoptedKeys.isValid()) { + return nullptr; + } + LocalPointer> result( + OrderedMap::create(adoptedContents.orphan(), + adoptedKeys.orphan(), + errorCode)); + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +// Only called by builder() +template +OrderedMap::Builder::Builder(UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + // initialize `keys` + keys.adoptInstead(new UVector(errorCode)); + CHECK_ERROR(errorCode); + keys->setDeleter(uprv_deleteUObject); + + // initialize `contents` + // No value comparator needed + contents.adoptInstead(new Hashtable(compareVariableName, nullptr, errorCode)); + CHECK_ERROR(errorCode); + // The `contents` hashtable owns the values, but does not own the keys + contents->setValueDeleter(uprv_deleteUObject); +} + +template +/* static */ typename OrderedMap::Builder* OrderedMap::builder(UErrorCode &errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer result(new Builder(errorCode)); + NULL_ON_ERROR(errorCode); + return result.orphan(); +} + +// Helper methods for copy constructor +template +/* static */ void OrderedMap::copyStrings(UElement *dst, UElement *src) { + dst->pointer = new UnicodeString(*(static_cast(src->pointer))); +} + +template +/* static */ UVector* OrderedMap::copyStringVector(const UVector& other) { + UErrorCode errorCode = U_ZERO_ERROR; + LocalPointer adoptedKeys(new UVector(other.size(), errorCode)); + NULL_ON_ERROR(errorCode); + adoptedKeys->setDeleter(uprv_deleteUObject); + adoptedKeys->assign(other, ©Strings, errorCode); + NULL_ON_ERROR(errorCode); + return adoptedKeys.orphan(); +} + +// Postcondition: U_FAILURE(errorCode) || !((return value).isBogus()) +template +/* static */ OrderedMap* OrderedMap::create(Hashtable* c, UVector* k, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer> result(new OrderedMap(c, k)); + if (result == nullptr || result->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + return result.orphan(); +} + +template +/* static */ Hashtable* OrderedMap::copyHashtable(const Hashtable& other) { + UErrorCode errorCode = U_ZERO_ERROR; + // No value comparator needed + LocalPointer adoptedContents(new Hashtable(compareVariableName, nullptr, errorCode)); + NULL_ON_ERROR(errorCode); + // The hashtable owns the values + adoptedContents->setValueDeleter(uprv_deleteUObject); + + // Copy all the key/value bindings over + const UHashElement *e; + int32_t pos = UHASH_FIRST; + V *val; + while ((e = other.nextElement(pos)) != nullptr) { + val = new V(*(static_cast(e->value.pointer))); + if (val == nullptr) { + return nullptr; + } + UnicodeString *s = static_cast(e->key.pointer); + adoptedContents->put(*s, val, errorCode); + } + NULL_ON_ERROR(errorCode); + return adoptedContents.orphan(); +} + +template +OrderedMap::OrderedMap(Hashtable* c, UVector* k) : contents(c), keys(k) { + // It would be an error if `c` and `k` had different sizes + U_ASSERT(c->count() == k->size()); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT_UTILS_IMPL_H + +#endif // U_HIDE_DEPRECATED_API +// eof + diff --git a/icu4c/source/i18n/sources.txt b/icu4c/source/i18n/sources.txt index a1af43b93e84..a01d8dfc1ea0 100644 --- a/icu4c/source/i18n/sources.txt +++ b/icu4c/source/i18n/sources.txt @@ -94,6 +94,15 @@ measunit.cpp measunit_extra.cpp measure.cpp msgfmt.cpp +messageformat2.cpp +messageformat2_builder.cpp +messageformat2_checker.cpp +messageformat2_context.cpp +messageformat2_data_model.cpp +messageformat2_formatting_context.cpp +messageformat2_function_registry.cpp +messageformat2_parser.cpp +messageformat2_serializer.cpp name2uni.cpp nfrs.cpp nfrule.cpp diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h new file mode 100644 index 000000000000..4ca38995de8d --- /dev/null +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -0,0 +1,482 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT2_H +#define MESSAGEFORMAT2_H + +#if U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C++ API: Formats messages using the draft MessageFormat 2.0. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/format.h" +#include "unicode/messageformat2_checker.h" +#include "unicode/messageformat2_context.h" +#include "unicode/messageformat2_data_model.h" +#include "unicode/messageformat2_formatting_context.h" +#include "unicode/messageformat2_function_registry.h" +#include "unicode/messageformat2_macros.h" +#include "unicode/unistr.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN namespace message2 { + +/** + *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. + * Since it is not final, documentation has not yet been added everywhere. + * + *

See the + * description of the syntax with examples and use cases and the corresponding + * ABNF grammar.

+ * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + +// Note: This class does not currently inherit from the existing +// `Format` class. +class U_I18N_API MessageFormatter : public UObject { +public: + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~MessageFormatter(); + + /** + * Formats the message to a string, using the data model that was previously set or parsed, + * and the given `arguments` object. + * + * @param arguments Reference to message arguments + * @param status Input/output error code used to indicate syntax errors, data model + * errors, resolution errors, formatting errors, selection errors, as well + * as other errors (such as memory allocation failures). Partial output + * is still provided in the presence of most error types. + * @param result Mutable reference to a string that the output will be appended to. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + void formatToString(const MessageArguments& arguments, UErrorCode &status, UnicodeString &result) const; + + /** + * Accesses the locale that this `MessageFormatter` object was created with. + * + * @return A reference to the locale. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Locale& getLocale() const { return locale; } + + /** + * Serializes the data model as a string in MessageFormat 2.0 syntax. + * + * @param result Mutable reference to a string that the output will be appended to. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + void getPattern(UnicodeString& result) const { + // Converts the current data model back to a string + U_ASSERT(dataModelOK()); + Serializer serializer(getDataModel(), result); + serializer.serialize(); + } + + /** + * Accesses the data model referred to by this + * `MessageFormatter` object. + * + * @return A reference to the data model. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + // Give public access to the data model + const MessageFormatDataModel& getDataModel() const; + + /** + * The mutable Builder class allows each part of the MessageFormatter to be initialized + * separately; calling its `build()` method yields an immutable MessageFormatter. + */ + class U_I18N_API Builder { + private: + friend class MessageFormatter; + + Builder() : locale(Locale::getDefault()), customFunctionRegistry(nullptr) {} + + // The pattern to be parsed to generate the formatted message + UnicodeString pattern; + bool hasPattern; + // The data model to be used to generate the formatted message + // Invariant: !(hasPattern && dataModel != nullptr) + const MessageFormatDataModel* dataModel; + Locale locale; + LocalPointer standardFunctionRegistry; + // Not owned + const FunctionRegistry* customFunctionRegistry; + + public: + /** + * Sets the locale to use for formatting. + * + * @param locale The desired locale. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setLocale(const Locale& locale); + /** + * Sets the pattern to be parsed into a data model. (Parsing is + * delayed until `build()` is called.) If a data model was + * previously set, the reference to it held by this builder + * is removed. + * + * @param pattern A string in MessageFormat 2.0 syntax. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setPattern(const UnicodeString& pattern); + /** + * Sets a custom function registry. + * + * @param functionRegistry Function registry to use; this argument is + * not adopted, and the caller must ensure its lifetime contains + * the lifetime of the `MessageFormatter` object built by this + * builder. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setFunctionRegistry(const FunctionRegistry* functionRegistry); + /** + * Sets a data model. If a pattern was previously set, it is removed. + * + * @param dataModel Data model to format; this argument is + * not adopted, and the caller must ensure its lifetime contains + * the lifetime of the `MessageFormatter` object built by this + * builder. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setDataModel(const MessageFormatDataModel* dataModel); + /** + * Constructs a new immutable MessageFormatter using the pattern or data model + * that was previously set, and the locale (if it was previously set) + * or default locale (otherwise). + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param parseError Struct to receive information on the position + * of an error within the pattern (not used if + * the data model is set). + * @param status Input/output error code. If the + * pattern cannot be parsed, or if neither the pattern + * nor the data model is set, set to failure code. + * @return The new MessageFormatter object, which is non-null if + * U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + MessageFormatter* build(UParseError& parseError, UErrorCode& status) const; + }; // class MessageFormatter::Builder + + /** + * Returns a new `MessageFormatter::Builder` object. + * + * @param status Input/output error code. + * @return The new Builder, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& status); + + // TODO: Shouldn't be public; only used for testing + const UnicodeString& getNormalizedPattern() const { return normalizedInput; } + + private: + friend class Builder; + friend class MessageContext; + + MessageFormatter(const MessageFormatter::Builder& builder, UParseError &parseError, UErrorCode &status); + + MessageFormatter() = delete; // default constructor not implemented + + // Do not define default assignment operator + const MessageFormatter &operator=(const MessageFormatter &) = delete; + + // Parser class (private) + class Parser : public UMemory { + public: + virtual ~Parser(); + static Parser* create(const UnicodeString &input, MessageFormatDataModel::Builder& dataModelBuilder, UnicodeString& normalizedInput, Errors& errors, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + Parser* p = new Parser(input, dataModelBuilder, errors, normalizedInput); + if (p == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return p; + } + // The parser validates the message and builds the data model + // from it. + void parse(UParseError &, UErrorCode &); + private: + friend class MessageFormatDataModel::Builder; + + /* + Use an internal "parse error" structure to make it easier to translate + absolute offsets to line offsets. + This is translated back to a `UParseError` at the end of parsing. + */ + typedef struct MessageParseError { + // The line on which the error occurred + uint32_t line; + // The offset, relative to the erroneous line, on which the error occurred + uint32_t offset; + // The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0. + // It includes newline characters, because the index does too. + uint32_t lengthBeforeCurrentLine; + + // This parser doesn't yet use the last two fields. + UChar preContext[U_PARSE_CONTEXT_LEN]; + UChar postContext[U_PARSE_CONTEXT_LEN]; + } MessageParseError; + + Parser(const UnicodeString &input, MessageFormatDataModel::Builder& dataModelBuilder, Errors& e, UnicodeString& normalizedInputRef) + : source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) { + parseError.line = 0; + parseError.offset = 0; + parseError.lengthBeforeCurrentLine = 0; + parseError.preContext[0] = '\0'; + parseError.postContext[0] = '\0'; + } + + // Used so `parseEscapeSequence()` can handle all types of escape sequences + // (literal, text, and reserved) + typedef enum { LITERAL, TEXT, RESERVED } EscapeKind; + + static void translateParseError(const MessageParseError&, UParseError&); + static void setParseError(MessageParseError&, uint32_t); + void maybeAdvanceLine(); + void parseBody(UErrorCode &); + void parseDeclarations(UErrorCode &); + void parseSelectors(UErrorCode &); + + void parseWhitespaceMaybeRequired(bool, UErrorCode &); + void parseRequiredWhitespace(UErrorCode &); + void parseOptionalWhitespace(UErrorCode &); + void parseToken(UChar32, UErrorCode &); + void parseTokenWithWhitespace(UChar32, UErrorCode &); + template + void parseToken(const UChar32 (&)[N], UErrorCode &); + template + void parseTokenWithWhitespace(const UChar32 (&)[N], UErrorCode &); + void parseName(UErrorCode&, UnicodeString&); + void parseVariableName(UErrorCode&, UnicodeString&); + FunctionName* parseFunction(UErrorCode&); + void parseEscapeSequence(EscapeKind, UErrorCode &, UnicodeString&); + void parseLiteralEscape(UErrorCode &, UnicodeString&); + void parseLiteral(UErrorCode &, bool&, UnicodeString&); + void parseOption(UErrorCode&, MessageFormatDataModel::Operator::Builder&); + void parseOptions(UErrorCode &, MessageFormatDataModel::Operator::Builder&); + void parseReservedEscape(UErrorCode&, UnicodeString&); + void parseReservedChunk(UErrorCode &, MessageFormatDataModel::Reserved::Builder&); + MessageFormatDataModel::Reserved* parseReserved(UErrorCode &); + MessageFormatDataModel::Operator* parseAnnotation(UErrorCode &); + void parseLiteralOrVariableWithAnnotation(bool, UErrorCode &, MessageFormatDataModel::Expression::Builder&); + MessageFormatDataModel::Expression* parseExpression(bool&, UErrorCode &); + void parseTextEscape(UErrorCode&, UnicodeString&); + void parseText(UErrorCode&, UnicodeString&); + MessageFormatDataModel::Key* parseKey(UErrorCode&); + MessageFormatDataModel::SelectorKeys* parseNonEmptyKeys(UErrorCode&); + void errorPattern(UErrorCode&); + MessageFormatDataModel::Pattern* parsePattern(UErrorCode&); + + // The input string + const UnicodeString &source; + // The current position within the input string + uint32_t index; + // Represents the current line (and when an error is indicated), + // character offset within the line of the parse error + MessageParseError parseError; + + // The structure to use for recording errors + Errors& errors; + + // Normalized version of the input string (optional whitespace removed) + UnicodeString& normalizedInput; + + // The parent builder + MessageFormatDataModel::Builder &dataModel; + }; // class Parser + + // Serializer class (private) + // Converts a data model back to a string + class Serializer : public UMemory { + public: + Serializer(const MessageFormatDataModel& m, UnicodeString& s) : dataModel(m), result(s) {} + void serialize(); + + const MessageFormatDataModel& dataModel; + UnicodeString& result; + + private: + void whitespace(); + void emit(UChar32); + template + void emit(const UChar32 (&)[N]); + void emit(const UnicodeString&); + void emit(const FunctionName&); + void emit(const VariableName&); + void emit(const MessageFormatDataModel::Literal&); + void emit(const MessageFormatDataModel::Key&); + void emit(const MessageFormatDataModel::SelectorKeys&); + void emit(const MessageFormatDataModel::Operand&); + void emit(const MessageFormatDataModel::Expression&); + void emit(const MessageFormatDataModel::PatternPart&); + void emit(const MessageFormatDataModel::Pattern&); + void emit(const MessageFormatDataModel::VariantMap&); + void emit(const MessageFormatDataModel::OptionMap&); + void serializeDeclarations(); + void serializeSelectors(); + void serializeVariants(); + }; // class Serializer + + // Checks a data model for semantic errors + // (Errors are defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md ) + class Checker { + public: + void check(UErrorCode& error); + Checker(const MessageFormatDataModel& m, Errors& e) : dataModel(m), errors(e) {} + private: + void requireAnnotated(const TypeEnvironment&, const MessageFormatDataModel::Expression&, UErrorCode&); + void checkDeclarations(TypeEnvironment&, UErrorCode&); + void checkSelectors(const TypeEnvironment&, UErrorCode&); + void checkVariants(UErrorCode&); + void check(const MessageFormatDataModel::OptionMap&, UErrorCode&); + void check(const MessageFormatDataModel::Operand&, UErrorCode&); + void check(const MessageFormatDataModel::Expression&, UErrorCode&); + void check(const MessageFormatDataModel::Pattern&, UErrorCode&); + const MessageFormatDataModel& dataModel; + Errors& errors; + }; + + void resolveVariables(const Environment& env, const MessageFormatDataModel::Operand&, ExpressionContext&, UErrorCode &) const; + void resolveVariables(const Environment& env, const MessageFormatDataModel::Expression&, ExpressionContext&, UErrorCode &) const; + + // Selection methods + void resolveSelectors(MessageContext&, const Environment& env, const MessageFormatDataModel::ExpressionList&, UErrorCode&, UVector&) const; + void matchSelectorKeys(const UVector&, ExpressionContext&, UErrorCode&, UVector&) const; + void resolvePreferences(const UVector&, const MessageFormatDataModel::VariantMap&, UErrorCode&, UVector&) const; + + // Formatting methods + void formatLiteral(const MessageFormatDataModel::Literal&, ExpressionContext&) const; + void formatPattern(MessageContext&, const Environment&, const MessageFormatDataModel::Pattern&, UErrorCode&, UnicodeString&) const; + // Formats an expression that appears as a selector + void formatSelectorExpression(const Environment& env, const MessageFormatDataModel::Expression&, ExpressionContext&, UErrorCode&) const; + // Formats an expression that appears in a pattern or as the definition of a local variable + void formatExpression(const Environment&, const MessageFormatDataModel::Expression&, ExpressionContext&, UErrorCode&) const; + void resolveOptions(const Environment& env, const MessageFormatDataModel::OptionMap&, ExpressionContext&, UErrorCode&) const; + void formatOperand(const Environment&, const MessageFormatDataModel::Operand&, ExpressionContext&, UErrorCode&) const; + void evalArgument(const VariableName&, ExpressionContext&) const; + void formatSelectors(MessageContext& context, const Environment& env, const MessageFormatDataModel::ExpressionList& selectors, const MessageFormatDataModel::VariantMap& variants, UErrorCode &status, UnicodeString& result) const; + + // Function registry methods + const Formatter* maybeCachedFormatter(MessageContext&, const FunctionName&, UErrorCode& errorCode) const; + + bool hasCustomFunctionRegistry() const { + return (customFunctionRegistry != nullptr); + } + + // Precondition: custom function registry exists + const FunctionRegistry& getCustomFunctionRegistry() const { + U_ASSERT(hasCustomFunctionRegistry()); + return *customFunctionRegistry; + } + + // Checking for resolution errors + void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const; + void check(MessageContext&, const Environment&, const MessageFormatDataModel::Expression&, UErrorCode&) const; + void check(MessageContext&, const Environment&, const MessageFormatDataModel::Operand&, UErrorCode&) const; + void check(MessageContext&, const Environment&, const MessageFormatDataModel::OptionMap&, UErrorCode&) const; + + void initErrors(UErrorCode&); + void clearErrors() const; + + // The locale this MessageFormatter was created with + const Locale locale; + + // Registry for built-in functions + LocalPointer standardFunctionRegistry; + // Registry for custom functions; may be null if no custom registry supplied + // Note: this is *not* owned by the MessageFormatter object + const FunctionRegistry* customFunctionRegistry; + + // Data model, representing the parsed message + // May be either owned (if created by parsing a pattern), or + // borrowed (if supplied by the builder's setDataModel() method) -- + // the ownedDataModel flag determines which one + LocalPointer dataModel; + const MessageFormatDataModel* borrowedDataModel; + bool ownedDataModel; + + // Upholds the invariant that either the data model or borrowed data model is valid, + // but not both + bool dataModelOK() const; + + // Normalized version of the input string (optional whitespace removed) + UnicodeString normalizedInput; + + // Formatter cache + LocalPointer cachedFormatters; + + // Errors -- only used while parsing and checking for data model errors; then + // the MessageContext keeps track of errors + LocalPointer errors; +}; // class MessageFormatter + +// For how this class is used, see the references to (integer, variant) tuples +// in https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection +// Ideally this would have been a private class nested in MessageFormatter, +// but sorting comparators need to reference it +class PrioritizedVariant : public UObject { +public: + int32_t priority; + const MessageFormatDataModel::SelectorKeys& keys; + const MessageFormatDataModel::Pattern& pat; + PrioritizedVariant(uint32_t p, + const MessageFormatDataModel::SelectorKeys& k, + const MessageFormatDataModel::Pattern& pattern) : priority(p), keys(k), pat(pattern) {} + virtual ~PrioritizedVariant(); +}; // class PrioritizedVariant + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT2_H + +#endif // U_HIDE_DEPRECATED_API +// eof diff --git a/icu4c/source/i18n/unicode/messageformat2_checker.h b/icu4c/source/i18n/unicode/messageformat2_checker.h new file mode 100644 index 000000000000..abba9c9ff058 --- /dev/null +++ b/icu4c/source/i18n/unicode/messageformat2_checker.h @@ -0,0 +1,55 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT_CHECKER_H +#define MESSAGEFORMAT_CHECKER_H + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2_data_model.h" +#include "unicode/unistr.h" +#include "unicode/utypes.h" +#include "hash.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN namespace message2 { + +using VariableName = MessageFormatDataModel::VariableName; + +// Used for checking missing selector annotation errors +class TypeEnvironment : public UMemory { + public: + // MessageFormat has a simple type system; + // variables are either annotated or unannotated + enum Type { + Annotated, + Unannotated + }; + void extend(const VariableName&, Type, UErrorCode&); + Type get(const VariableName&) const; + TypeEnvironment(UErrorCode&); + + virtual ~TypeEnvironment(); + + private: + // Stores variables known to be annotated. + // All others are assumed to be unannotated + LocalPointer annotated; +}; // class TypeEnvironment + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT_CHECKER_H + +#endif // U_HIDE_DEPRECATED_API +// eof + diff --git a/icu4c/source/i18n/unicode/messageformat2_context.h b/icu4c/source/i18n/unicode/messageformat2_context.h new file mode 100644 index 000000000000..9a4f4e9f63f6 --- /dev/null +++ b/icu4c/source/i18n/unicode/messageformat2_context.h @@ -0,0 +1,417 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT2_CONTEXT_H +#define MESSAGEFORMAT2_CONTEXT_H + +#if U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C++ API: Formats messages using the draft MessageFormat 2.0. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2_data_model.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN namespace message2 { + +class Formatter; +class FormatterFactory; +class SelectorFactory; + +using FunctionName = MessageFormatDataModel::FunctionName; +using VariableName = MessageFormatDataModel::VariableName; + +// Intermediate classes used internally in the formatter + +// Closures and environments +// ------------------------- + +class Environment; + +// A closure represents the right-hand side of a variable +// declaration, along with an environment giving values +// to its free variables +class Closure : public UMemory { +public: + using Expression = MessageFormatDataModel::Expression; + + static Closure* create(const Expression&, const Environment&, UErrorCode&); + const Expression& getExpr() const { + return expr; + } + const Environment& getEnv() const { + return env; + } + virtual ~Closure(); +private: + Closure(const Expression& expression, const Environment& environment) : expr(expression), env(environment) {} + + // An unevaluated expression + const Expression& expr; + // The environment mapping names used in this + // expression to other expressions + const Environment& env; +}; + +// An environment is represented as a linked chain of +// non-empty environments, terminating at an empty environment. +// It's searched using linear search. +class Environment : public UMemory { +public: + virtual const Closure* lookup(const VariableName&) const = 0; + static Environment* create(UErrorCode&); + static Environment* create(const VariableName&, Closure*, Environment*, UErrorCode&); + virtual ~Environment(); +}; + +class NonEmptyEnvironment; +class EmptyEnvironment : public Environment { +private: + friend class Environment; + + const Closure* lookup(const VariableName&) const override; + static EmptyEnvironment* create(UErrorCode&); + virtual ~EmptyEnvironment(); + // Adopts its closure argument + static NonEmptyEnvironment* create(const VariableName&, Closure*, Environment*, UErrorCode&); + + EmptyEnvironment() {} +}; + +class NonEmptyEnvironment : public Environment { +private: + friend class Environment; + const Closure* lookup(const VariableName&) const override; + // Adopts its closure argument + static NonEmptyEnvironment* create(const VariableName&, Closure*, const Environment*, UErrorCode&); + virtual ~NonEmptyEnvironment(); +private: + friend class Environment; + + NonEmptyEnvironment(const VariableName& v, Closure* c, Environment* e) : var(v), rhs(c), parent(e) {} + + // Maps VariableName onto Closure* + // Chain of linked environments + VariableName var; + const LocalPointer rhs; // should be valid + const LocalPointer parent; +}; + +// Errors +// ---------- + +class Error : public UMemory { + public: + enum Type { + DuplicateOptionName, + UnresolvedVariable, + FormattingError, + MissingSelectorAnnotation, + NonexhaustivePattern, + ReservedError, + SelectorError, + SyntaxError, + UnknownFunction, + VariantKeyMismatchError + }; + Error(Type ty) : type(ty) {} + Error(Type ty, const UnicodeString& s) : type(ty), contents(s) {} + virtual ~Error(); + private: + friend class Errors; + + Type type; + UnicodeString contents; +}; // class Error + +class Errors : public UMemory { + private: + LocalPointer syntaxAndDataModelErrors; + LocalPointer resolutionAndFormattingErrors; + bool dataModelError; + bool formattingError; + bool missingSelectorAnnotationError; + bool selectorError; + bool syntaxError; + bool unknownFunctionError; + bool unresolvedVariableError; + Errors(UErrorCode& errorCode); + + public: + static Errors* create(UErrorCode&); + + int32_t count() const; + void setSelectorError(const FunctionName&, UErrorCode&); + void setReservedError(UErrorCode&); + void setMissingSelectorAnnotation(UErrorCode&); + void setUnresolvedVariable(const VariableName&, UErrorCode&); + void addSyntaxError(UErrorCode&); + void setUnknownFunction(const FunctionName&, UErrorCode&); + void setFormattingError(const FunctionName&, UErrorCode&); + bool hasDataModelError() const { return dataModelError; } + bool hasFormattingError() const { return formattingError; } + bool hasSelectorError() const { return selectorError; } + bool hasSyntaxError() const { return syntaxError; } + bool hasUnknownFunctionError() const { return unknownFunctionError; } + bool hasMissingSelectorAnnotationError() const { return missingSelectorAnnotationError; } + bool hasUnresolvedVariableError() const { return unresolvedVariableError; } + void addError(Error, UErrorCode&); + void checkErrors(UErrorCode&); + void clearResolutionAndFormattingErrors(); + bool hasError() const; + + virtual ~Errors(); +}; // class Errors + +// Arguments +// ---------- + +/** + *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. + * Since it is not final, documentation has not yet been added everywhere. + * + * The following class represents the named arguments to a message. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + +class U_I18N_API MessageArguments : public UObject { +public: + /** + * The mutable Builder class allows each message argument to be initialized + * separately; calling its `build()` method yields an immutable MessageArguments. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class Builder { + public: + /** + * Adds an argument of type `UnicodeString`. + * + * @param key The name of the argument. + * @param value The value of the argument. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& add(const UnicodeString& key, const UnicodeString& value, UErrorCode& status); + /** + * Adds an argument of type `double`. + * + * @param key The name of the argument. + * @param value The value of the argument. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addDouble(const UnicodeString& key, double value, UErrorCode& status); + /** + * Adds an argument of type `int64_t`. + * + * @param key The name of the argument. + * @param value The value of the argument. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addInt64(const UnicodeString& key, int64_t value, UErrorCode& status); + /** + * Adds an argument of type `UDate`. + * + * @param key The name of the argument. + * @param value The value of the argument. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addDate(const UnicodeString& key, UDate value, UErrorCode& status); + /** + * Adds an argument of type `StringPiece`, representing a + * decimal number. + * + * @param key The name of the argument. + * @param value The value of the argument. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addDecimal(const UnicodeString& key, StringPiece value, UErrorCode& status); + /** + * Adds an argument of type UnicodeString[]. Adopts `value`. + * + * @param key The name of the argument. + * @param value The value of the argument, interpreted as an array of strings. + * @param length The length of the array. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& add(const UnicodeString& key, const UnicodeString* value, int32_t length, UErrorCode& status); + /** + * Adds an argument of type UObject*, which must be non-null. Does not + * adopt this argument. + * + * @param key The name of the argument. + * @param value The value of the argument. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addObject(const UnicodeString& key, const UObject* value, UErrorCode& status); + /** + * Creates an immutable `MessageArguments` object with the argument names + * and values that were added by previous calls. The builder can still be used + * after this call. + * + * @param status Input/output error code. + * @return The new MessageArguments object, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + MessageArguments* build(UErrorCode& status) const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + private: + friend class MessageArguments; + Builder(UErrorCode&); + Builder& add(const UnicodeString&, Formattable*, UErrorCode&); + LocalPointer contents; + // Keep a separate hash table for objects, which does not + // own the values + // This is because a Formattable that wraps an object can't + // be copied + // Here, the values are UObjects* + LocalPointer objectContents; + }; // class MessageArguments::Builder + + /** + * Returns a new `MessageArguments::Builder` object. + * + * @param status Input/output error code. + * @return The new builder, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& status); + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~MessageArguments(); +private: + friend class MessageContext; + + bool hasFormattable(const VariableName&) const; + bool hasObject(const VariableName&) const; + const Formattable& getFormattable(const VariableName&) const; + const UObject* getObject(const VariableName&) const; + + MessageArguments& add(const UnicodeString&, Formattable*, UErrorCode&); + MessageArguments(Hashtable* c, Hashtable* o) : contents(c), objectContents(o) {} + LocalPointer contents; + // Keep a separate hash table for objects, which does not + // own the values + LocalPointer objectContents; +}; // class MessageArguments + +// Formatter cache +// -------------- + +// Map from expression pointers to Formatters +class CachedFormatters : public UMemory { +private: + friend class MessageFormatter; + + LocalPointer cache; + CachedFormatters(UErrorCode&); + +public: + const Formatter* getFormatter(const FunctionName&); + void setFormatter(const FunctionName&, Formatter*, UErrorCode& errorCode); +}; + +// The context contains all the information needed to process +// an entire message: arguments, formatter cache, and error list + +class MessageFormatter; + +class MessageContext : public UMemory { +public: + static MessageContext* create(const MessageFormatter& mf, const MessageArguments& args, Errors& errors, UErrorCode& errorCode); + + bool isCustomFormatter(const FunctionName&) const; + const Formatter* maybeCachedFormatter(const FunctionName&, UErrorCode&); + const SelectorFactory* lookupSelectorFactory(const FunctionName&, UErrorCode& status) const; + bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); } + bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); } + + bool hasGlobal(const VariableName& v) const { return hasGlobalAsFormattable(v) || hasGlobalAsObject(v); } + bool hasGlobalAsFormattable(const VariableName&) const; + bool hasGlobalAsObject(const VariableName&) const; + const Formattable& getGlobalAsFormattable(const VariableName&) const; + const UObject* getGlobalAsObject(const VariableName&) const; + + // If any errors were set, update `status` accordingly + void checkErrors(UErrorCode& status) const; + Errors& getErrors() const { return errors; } + + const MessageFormatter& messageFormatter() const { return parent; } + + virtual ~MessageContext(); + +private: + MessageContext(const MessageFormatter&, const MessageArguments&, Errors&); + + FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode&) const; + bool isBuiltInSelector(const FunctionName&) const; + bool isBuiltInFormatter(const FunctionName&) const; + bool isCustomSelector(const FunctionName&) const; + + const MessageFormatter& parent; + const MessageArguments& arguments; // External message arguments + // Errors accumulated during parsing/formatting + Errors& errors; +}; // class MessageContext + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT2_CONTEXT_H + +#endif // U_HIDE_DEPRECATED_API +// eof diff --git a/icu4c/source/i18n/unicode/messageformat2_data_model.h b/icu4c/source/i18n/unicode/messageformat2_data_model.h new file mode 100644 index 000000000000..fdd9cb80b807 --- /dev/null +++ b/icu4c/source/i18n/unicode/messageformat2_data_model.h @@ -0,0 +1,1652 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT_DATA_MODEL_H +#define MESSAGEFORMAT_DATA_MODEL_H + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2_macros.h" +#include "unicode/messageformat2_utils.h" +#include "unicode/unistr.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN namespace message2 { + +// ----------------------------------------------------------------------- +// Public MessageFormatDataModel class + +/** + *

MessageFormat2 is a Technical Preview API implementing MessageFormat 2.0. + * Since it is not final, documentation has not yet been added everywhere. + * + * The `MessageFormatDataModel` class describes a parsed representation of the text of a message. + * This representation is public as higher-level APIs for messages will need to know its public + * interface: for example, to re-instantiate a parsed message with different values for imported +variables. + * + * The MessageFormatDataModel API implements the + * specification of the abstract syntax (data model representation) for MessageFormat. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +class U_I18N_API MessageFormatDataModel : public UMemory { +/* + Classes that represent nodes in the data model are nested inside the + `MessageFormatDataModel` class. + + Classes such as `Expression`, `Pattern` and `VariantMap` are immutable and + are constructed using the builder pattern. + + Most classes representing nodes have copy constructors. This is because builders + contain immutable data that must be copied when calling `build()`, since the builder + could go out of scope before the immutable result of the builder does. Copying is + also necessary to prevent unexpected mutation if intermediate builders are saved + and mutated again after calling `build()`. + + The copy constructors perform a deep copy, for example by copying the entire + list of options for an `Operator` (and copying the entire underlying vector.) + Some internal fields should be `const`, but are declared as non-`const` to make + the copy constructor simpler to implement. (These are noted throughout.) In + other words, those fields are `const` except during the execution of a copy + constructor. + + On the other hand, intermediate `Builder` methods that return a `Builder&` + mutate the state of the builder, so in code like: + + Expression::Builder& exprBuilder = Expression::builder()-> setOperand(foo); + Expression::Builder& exprBuilder2 = exprBuilder.setOperator(bar); + + the call to `setOperator()` would mutate `exprBuilder`, since `exprBuilder` + and `exprBuilder2` are references to the same object. + + An alternate choice would be to make `build()` destructive, so that copying would + be unnecessary. Or, both copying and moving variants of `build()` could be + provided. Copying variants of the intermediate `Builder` methods could be + provided as well, if this proved useful. +*/ + +public: + // Forward declarations + class Binding; + class Expression; + class Key; + class Operand; + class Operator; + class Pattern; + class PatternPart; + class Reserved; + class VariantMap; + + using Bindings = ImmutableVector; + using ExpressionList = ImmutableVector; + using KeyList = ImmutableVector; + using OptionMap = OrderedMap; + + /** + * The `VariableName` class represents the name of a variable in a message. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API VariableName { + public: + /** + * Equality comparison. + * + * @param other the object to be compared with. + * @return true if other is equal to this, false otherwise. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + inline bool operator== (const VariableName& other) const { return other.variableName == variableName; } + /** + * Constructor. + * + * @param s The variable name, as a string + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + VariableName(const UnicodeString& s) : variableName(s) {} + /** + * Default constructor. (Needed for representing null operands) + * + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + VariableName() {} + /** + * Returns the name of this variable, as a string. + * + * @return Reference to the variable's name + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const UnicodeString& identifier() const { return variableName; } + /** + * Returns the name of this variable, as a string prefixed by the + * variable name sigil ('$') + * + * @return String representation of the variable as it appears in a declaration + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UnicodeString declaration() const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~VariableName(); + private: + const UnicodeString variableName; + }; // class VariableName + + /** + * The `FunctionName` class represents the name of a function referred to + * in a message. + * + * It corresponds to the `FunctionRef` interface defined in + * https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#expressions + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API FunctionName : public UMemory { + public: + /** + * Type representing the function's kind, which is either ':' (the default) + * or "open" ('+')/"close" ('-'), usually used for markup functions. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + enum Sigil { + OPEN, + CLOSE, + DEFAULT + }; + /** + * Converts the function name to a string that includes the sigil. + * + * @return A string beginning with the sigil, followed by the function's name. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UnicodeString toString() const; + /** + * Constructor. + * + * @param s The function name, as a string. Constructs a function name with the default sigil. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionName(UnicodeString s) : functionName(s), functionSigil(Sigil::DEFAULT) {} + /** + * Constructor. + * + * @param n The function name, as a string. + * @param s The function sigil to use. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionName(UnicodeString n, Sigil s) : functionName(n), functionSigil(s) {} + /** + * Copy constructor. + * + * @param other The function name to copy. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionName(const FunctionName& other) : functionName(other.functionName), functionSigil(other.functionSigil) {} + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~FunctionName(); + + private: + const UnicodeString functionName; + const Sigil functionSigil; + + UChar sigilChar() const { + switch (functionSigil) { + case Sigil::OPEN: { return PLUS; } + case Sigil::CLOSE: { return HYPHEN; } + case Sigil::DEFAULT: { return COLON; } + } + U_ASSERT(false); + } + }; // class FunctionName + + /** + * The `Literal` class corresponds to the `literal` nonterminal in the MessageFormat 2 grammar, + * https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf and the + * `Literal` interface defined in + * // https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#expressions + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Literal { + public: + /** + * Returns the quoted representation of this literal (enclosed in '|' characters) + * + * @return A string representation of the literal enclosed in quote characters. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UnicodeString quotedString() const; + /** + * Returns the parsed string contents of this literal. + * + * @return A reference to a Formattable whose string contents are + * the parsed string contents of this literal. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Formattable& getContents() const { return contents; } + /** + * Returns the parsed string contents of this literal. + * + * @return A string representation of this literal. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const UnicodeString& stringContents() const; + /** + * Determines if this literal appeared as a quoted literal in the message. + * + * @return true if and only if this literal appeared as a quoted literal in the + * message. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool quoted() const { return isQuoted; } + /** + * Literal constructor. + * + * @param q True if and only if this literal was parsed with the `quoted` nonterminal + * (appeared enclosed in '|' characters in the message text). + * @param s The string contents of this literal; escape sequences are assumed to have + * been interpreted already. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Literal(UBool q, const UnicodeString& s) : isQuoted(q), contents(s) {} + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Literal(); + + private: + friend class Key; + friend class ImmutableVector; + friend class Operand; + friend class Reserved; + + Literal(const Literal& other) : isQuoted(other.isQuoted), contents(other.contents) {} + + const bool isQuoted = false; + // Contents is stored as a Formattable to avoid allocating + // new Formattables during formatting, but it's guaranteed + // to be a string + const Formattable contents; + // Because Key uses `Literal` as its underlying representation, + // this provides a default constructor for wildcard keys + Literal() {} + }; + + /** + * The `Operand` class corresponds to the `operand` nonterminal in the MessageFormat 2 grammar, + * https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf . + * It represents a `Literal | VariableRef` -- see the `operand?` field of the `FunctionRef` + * interface defined at: + * https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#expressions + * with the difference that it can also represent a null operand (the absent operand in an + * `annotation` with no operand). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Operand : public UObject { + public: + /** + * Creates a new `variable` operand. + * + * @param var The variable name this operand represents. + * @param status Input/output error code. + * @return The new operand, guaranteed to be non-null if U_SUCCESS(status) + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Operand* create(const VariableName& var, UErrorCode& errorCode); + /** + * Creates a new `literal` operand. + * + * @param lit The literal this operand represents. + * @param status Input/output error code. + * @return The new operand, guaranteed to be non-null if U_SUCCESS(status) + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Operand* create(const Literal& lit, UErrorCode& errorCode); + /** + * Creates a new `null` operand, which should only appear when + * representing the following production in the grammar: + * expression = "{" [s] annotation [s] "}" + * + * @param status Input/output error code. + * @return The new operand, guaranteed to be non-null if U_SUCCESS(status) + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Operand* create(UErrorCode& errorCode); + /** + * Determines if this operand represents a variable. + * + * @return True if and only if the operand is a variable. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isVariable() const; + /** + * Determines if this operand represents a literal. + * + * @return True if and only if the operand is a literal. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isLiteral() const; + /** + * Determines if this operand is the null operand. + * + * @return True if and only if the operand is the null operand. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isNull() const; + /** + * Returns a reference to this operand's variable name. + * Precondition: isVariable() + * + * @return A reference to the name of the variable + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const VariableName& asVariable() const; + /** + * Returns a reference to this operand's literal contents. + * Precondition: isLiteral() + * + * @return A reference to the literal + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Literal& asLiteral() const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Operand(); + private: + friend class Expression; + friend class OrderedMap; + + enum Type { + VARIABLE, + LITERAL, + NULL_OPERAND + }; + // This wastes some space, but it's simpler than defining a copy + // constructor for a union + const VariableName var; + const Literal lit; + const Type type; + Operand(const Operand&); + Operand() : type(Type::NULL_OPERAND) {} + + Operand(const VariableName& v) : var(v), type(Type::VARIABLE) {} + Operand(const Literal& l) : lit(l), type(Type::LITERAL) {} + }; // class Operand + + /** + * The `Key` class corresponds to the `key` nonterminal in the MessageFormat 2 grammar, + * https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf . + * It also corresponds to + * the `Literal | CatchallKey` that is the + * element type of the `keys` array in the `Variant` interface + * defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#messages + * + * A key is either a literal or the wildcard symbol (represented in messages as '*') + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Key : public UObject { + public: + /** + * Determines if this is a wildcard key + * + * @return True if and only if this is the wildcard key + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isWildcard() const { return wildcard; } + /** + * Returns the contents of this key as a literal. + * Precondition: !isWildcard() + * + * @return The literal contents of the key + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Literal& asLiteral() const; + /** + * Creates a new wildcard key. + * + * @param status Input/output error code. + * @return The new key, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Key* create(UErrorCode& errorCode); + /** + * Creates a new literal key. + * + * @param lit The literal that this key matches with. + * @param status Input/output error code. + * @return The new key, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Key* create(const Literal& lit, UErrorCode& errorCode); + + private: + friend class ImmutableVector; + friend class VariantMap; + + Key(const Key& other) : wildcard(other.wildcard), contents(other.contents) {} + void toString(UnicodeString& result) const; + + // Wildcard constructor + Key() : wildcard(true) {} + // Concrete key constructor + Key(const Literal& lit) : wildcard(false), contents(lit) {} + const bool wildcard; // True if this represents the wildcard "*" + const Literal contents; + }; // class Key + + /** + * The `SelectorKeys` class represents the key list for a single variant. + * It corresponds to the `keys` array in the `Variant` interface + * defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#messages + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API SelectorKeys : public UObject { + public: + /** + * Returns the underlying list of keys. + * + * @return A reference to the list of keys for this variant. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const KeyList& getKeys() const; + /** + * The mutable `SelectorKeys::Builder` class allows the key list to be constructed + * one key at a time. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + private: + friend class SelectorKeys; + Builder(UErrorCode&); + LocalPointer::Builder> keys; + public: + /** + * Adds a single key to the list. Adopts `key`. + * + * @param key The key to be added. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& add(Key* key, UErrorCode& status); + /** + * Constructs a new immutable `SelectorKeys` using the list of keys + * set with previous `add()` calls. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new SelectorKeys object, which is non-null if + * U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + SelectorKeys* build(UErrorCode& status) const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + }; // class SelectorKeys::Builder + /** + * Returns a new `SelectorKeys::Builder` object. + * + * @param status Input/output error code. + * @return The new Builder object, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& status); + + private: + friend class ImmutableVector; + friend class VariantMap; + + SelectorKeys(const SelectorKeys& other); + + const LocalPointer keys; + bool isBogus() const { return !keys.isValid(); } + // Adopts `keys` + SelectorKeys(KeyList* ks) : keys(ks) {} + }; // class SelectorKeys + + /** + * The `VariantMap` class represents the set of all variants in a message that has selectors, + * relating `SelectorKeys` objects to `Pattern` objects, + * following the `variant` production in the grammar: + * + * variant = when 1*(s key) [s] pattern + * + * https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf#L9 + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API VariantMap : public UMemory { + public: + /** + * The initial iterator position to be used with `next()`. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static constexpr int32_t FIRST = OrderedMap::FIRST; + /** + * Iterates over all variants. The order in which variants are returned is unspecified. + * + * @param pos A mutable reference to the current iterator position. Should be set to + * `FIRST` before the first call to `next()`. + * @param k A mutable reference to a const pointer to a SelectorKeys object, + * representing the key list for a single variant. + * If the return value is true, then `k` refers to a non-null pointer. + * @param v A mutable reference to a const pointer to a Pattern object, + * representing the pattern of a single variant. + * If the return value is true, then `v` refers to a non-null pointer. + * @return True if and only if there are no further options after `pos`. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool next(int32_t &pos, const SelectorKeys*& k, const Pattern*& v) const; + /** + * Returns the number of variants. + * + * @return The size of this VariantMap. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + int32_t size() const; + /** + * The mutable `VariantMap::Builder` class allows the variant map to be + * constructed one variant at a time. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + public: + /** + * Adds a single variant to the map. Adopts `key` and `value`. + * + * @param key The key list for this variant. + * @param value The pattern for this variant. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& add(SelectorKeys* key, Pattern* value, UErrorCode& status); + /** + * Constructs a new immutable `VariantMap` using the variants + * added with previous `add()` calls. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new VariantMap, which is non-null if + * U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + VariantMap* build(UErrorCode& status) const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + private: + friend class VariantMap; + + static void concatenateKeys(const SelectorKeys& keys, UnicodeString& result); + Builder(UErrorCode& errorCode); + LocalPointer::Builder> contents; + LocalPointer::Builder> keyLists; + }; // class VariantMap::Builder + + /** + * Returns a new `VariantMap::Builder` object. + * + * @param status Input/output error code. + * @return The new builder, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& errorCode); + private: + /* + Internally, the map uses the `SelectorKeys` as its key, and the `pattern` as the value. + + This representation mirrors the ICU4J API: + public OrderedMap getVariants(); + + Since the `OrderedMap` class defined above is not polymorphic on its key + values, `VariantMap` is defined as a separate data type that wraps an + `OrderedMap`. + The `VariantMap::Builder::add()` method encodes its `SelectorKeys` as + a string, and the VariantMap::next() method decodes it. + */ + friend class Builder; + VariantMap(OrderedMap* vs, ImmutableVector* ks); + const LocalPointer> contents; + // See the method implementations for comments on + // how `keyLists` is used. + const LocalPointer> keyLists; + }; // class VariantMap + + /** + * The `Reserved` class represents a `reserved` annotation, as in the `reserved` nonterminal + * in the MessageFormat 2 grammar or the `Reserved` interface + * defined in + * https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#expressions + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class Reserved : public UMemory { + public: + /** + * A `Reserved` is a sequence of literals. + * + * @return The number of literals. + * * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + int32_t numParts() const; + /** + * Indexes into the sequence. + * Precondition: i < numParts() + * + * @param i Index of the part being accessed. + * @return The i'th literal in the sequence, which is + * guaranteed to be non-null. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Literal* getPart(int32_t i) const; + /** + * The mutable `Reserved::Builder` class allows the reserved sequence to be + * constructed one part at a time. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + private: + friend class Reserved; + + Builder(UErrorCode &errorCode); + LocalPointer::Builder> parts; + + public: + /** + * Adds a single literal to the reserved sequence. + * + * @param part The literal to be added + * @param status Input/output error code + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& add(const Literal& part, UErrorCode& status); + /** + * Constructs a new immutable `Reserved` using the list of parts + * set with previous `add()` calls. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new Reserved object, which is non-null if + * U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Reserved* build(UErrorCode& status) const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + }; // class Reserved::Builder + /** + * Returns a new `Reserved::Builder` object. + * + * @param status Input/output error code. + * @return The new Builder, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder *builder(UErrorCode& status); + private: + friend class Operator; + + // See comments under SelectorKeys' copy constructor; this is analogous + bool isBogus() const { return !parts.isValid(); } + + // Reserved needs a copy constructor in order to make Expression deeply copyable + Reserved(const Reserved& other) : parts(new ImmutableVector(*other.parts)) { + U_ASSERT(!other.isBogus()); + } + + // Possibly-empty list of parts + // `literal` reserved as a quoted literal; `reserved-char` / `reserved-escape` + // strings represented as unquoted literals + const LocalPointer> parts; + + // Can only be called by Builder + // Takes ownership of `ps` + Reserved(ImmutableVector *ps) : /* fallback(DefaultString()), */ parts(ps) { U_ASSERT(ps != nullptr); } + }; + + /** + * The `Operator` class corresponds to the `FunctionRef | Reserved` type in the + * `Expression` interface defined in + * https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#patterns + * + * It represents the annotation that an expression can have: either a function name paired + * with a map from option names to operands (possibly empty), + * or a reserved sequence, which has no meaning and results in an error if the formatter + * is invoked. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Operator : public UMemory { + public: + /** + * Determines if this operator is a reserved annotation. + * + * @return true if and only if this operator represents a reserved sequence. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isReserved() const { return isReservedSequence; } + /** + * Accesses the function name. + * Precondition: !isReserved() + * + * @return The function name of this operator. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const FunctionName& getFunctionName() const; + /** + * Accesses the underlying reserved sequence. + * Precondition: isReserved() + * + * @return The reserved sequence represented by this operator. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Reserved& asReserved() const; + /** + * Accesses function options. + * Precondition: !isReserved() + * + * @return A reference to the function options for this operator. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const OptionMap& getOptions() const; + + /** + * The mutable `Operator::Builder` class allows the operator to be constructed + * incrementally. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + private: + friend class Operator; + Builder() {} + LocalPointer asReserved; + LocalPointer functionName; + LocalPointer options; + public: + /** + * Sets this operator to be a reserved sequence. + * If a function name and/or options were previously set, + * clears them. Adopts `reserved`. + * + * @param reserved The reserved sequence to set as the contents of this Operator. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setReserved(Reserved* reserved); + /** + * Sets this operator to be a function annotation and sets its name + * to `func`. + * If a reserved sequence was previously set, clears it. + * + * @param func The function name. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setFunctionName(const FunctionName& func, UErrorCode& status); + /** + * Sets this operator to be a function annotation and adds a + * single option. + * If a reserved sequence was previously set, clears it. + * + * @param key The name of the option. + * @param value The value (right-hand side) of the option. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addOption(const UnicodeString &key, Operand* value, UErrorCode& status); + /** + * Constructs a new immutable `Operator` using the `reserved` annotation + * or the function name and options that were previously set. + * If neither `setReserved()` nor `setFunctionName()` was previously + * called, then `status` is set to U_INVALID_STATE_ERROR. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new Operator, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Operator* build(UErrorCode& status) const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + }; // class Operator::Builder + /** + * Returns a new `Operator::Builder` object. + * + * @param status Input/output error code. + * @return The new Builder, which is non-null if U_SUCCESS)status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& status); + private: + friend class Expression; + + // Postcondition: if U_SUCCESS(errorCode), then return value is non-bogus + static Operator* create(const Reserved& r, UErrorCode& errorCode); + + // Adopts `opts` + // Postcondition: if U_SUCCESS(errorCode), then return value is non-bogus + static Operator* create(const FunctionName& f, OptionMap* opts, UErrorCode& errorCode); + + // Function call constructor; adopts `l` if it's non-null (creates empty options otherwise) + Operator(const FunctionName& f, OptionMap *l); + // Reserved sequence constructor + // Result is bogus if copy of `r` fails + Operator(const Reserved& r) : isReservedSequence(true), functionName(FunctionName(UnicodeString(""))), options(nullptr), reserved(new Reserved(r)) {} + // Copy constructor + Operator(const Operator& other); + + bool isBogus() const; + const bool isReservedSequence; + const FunctionName functionName; + const LocalPointer options; + const LocalPointer reserved; + }; // class Operator + + /** + * The `Expression` class corresponds to the `expression` nonterminal in the MessageFormat 2 + * grammar and the `Expression` interface defined in + * https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#patterns + * + * It represents either an operand with no annotation; an annotation with no operand; + * or an operand annotated with an annotation. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Expression : public UObject { + public: + /** + * Checks if this expression is an annotation + * with no operand. + * + * @return True if and only if the expression has + * an annotation and has no operand. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isStandaloneAnnotation() const; + /** + * Checks if this expression has a function + * annotation (with or without an operand). A reserved + * sequence is not a function annotation. + * + * @return True if and only if the expression has an annotation + * that is a function. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isFunctionCall() const; + /** + * Returns true if and only if this expression is + * annotated with a reserved sequence. + * + * @return True if and only if the expression has an + * annotation that is a reserved sequence, + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isReserved() const; + /** + * Accesses the function or reserved sequence + * annotating this expression. + * Precondition: isFunctionCall() || isReserved() + * + * @return A reference to the operator of this expression. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Operator& getOperator() const; + /** + * Accesses the operand of this expression. + * + * @return A reference to the operand of this expression, + * which may be the null operand. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Operand& getOperand() const; + + /** + * The mutable `Expression::Builder` class allows the operator to be constructed + * incrementally. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + private: + friend class Expression; + Builder() {} + LocalPointer rand; + LocalPointer rator; + public: + /** + * Sets the operand of this expression. Adopts `rAnd`. + * + * @param rAnd The operand to set. Must be non-null. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setOperand(Operand* rAnd); + /** + * Sets the operator of this expression. Adopts `rAtor`. + * + * @param rAtor The operator to set. Must be non-null. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setOperator(Operator* rAtor); + /** + * Constructs a new immutable `Expression` using the operand and operator that + * were previously set. If neither `setOperand()` nor `setOperator()` was + * previously called, or if `setOperand()` was called with the null operand + * and `setOperator()` was never called, then `status` is set to + * U_INVALID_STATE_ERROR. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new Expression, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Expression* build(UErrorCode& status) const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + }; // class Expression::Builder + /** + * Returns a new `Expression::Builder` object. + * + * @param status Input/output error code. + * @return The new Builder, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& status); + + private: + friend class ImmutableVector; + friend class PatternPart; + friend class Binding; + + /* + Internally, an expression is represented as the application of an optional operator to an operand. + The operand is always present; for function calls with no operand, it's represented + as an operand for which `isNull()` is true. + + Operator | Operand + -------------------------------- + { |42| :fun opt=value } => (FunctionName=fun, | Literal(quoted=true, contents="42") + options={opt: value}) + { abcd } => null | Literal(quoted=false, contents="abcd") + { : fun opt=value } => (FunctionName=fun, + options={opt: value}) | NullOperand() + */ + + // Here, a separate variable isBogus tracks if any copies failed. + // This is because rator = nullptr and rand = nullptr are semantic here, + // so this can't just be a predicate that checks if those are null + bool bogus = false; // copy constructors explicitly set this to true on failure + + bool isBogus() const; + + // Expression needs a copy constructor in order to make Pattern deeply copyable + // (and for closures) + Expression(const Expression& other); + + Expression(const Operator &rAtor, const Operand &rAnd) : rator(new Operator(rAtor)), rand(new Operand(rAnd)) {} + Expression(const Operand &rAnd) : rator(nullptr), rand(new Operand(rAnd)) {} + Expression(const Operator &rAtor) : rator(new Operator(rAtor)), rand(new Operand()) {} + const LocalPointer rator; + const LocalPointer rand; + }; // class Expression + + /** + * A `PatternPart` is a single element (text or expression) in a `Pattern`. + * It corresponds to the `body` field of the `Pattern` interface + * defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#patterns + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API PatternPart : public UObject { + public: + /** + * Creates a new text part. + * + * @param t An arbitrary string. + * @param status Input/output error code. + * @return The new PatternPart, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static PatternPart* create(const UnicodeString& t, UErrorCode& status); + /** + * Creates a new expression part. Adopts `e`, which must be non-null. + * + * @param e Expression to use for this part. + * @param status Input/output error code. + * @return The new PatternPart, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static PatternPart* create(Expression* e, UErrorCode& errorCode); + /** + * Checks if the part is a text part. + * + * @return True if and only if this is a text part. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool isText() const { return isRawText; } + /** + * Accesses the expression of the part. + * Precondition: !isText() + * + * @return A reference to the part's underlying expression. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Expression& contents() const; + /** + * Accesses the text contents of the part. + * Precondition: isText() + * + * @return A reference to a string representing the part's text.. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const UnicodeString& asText() const; + + private: + friend class ImmutableVector; + friend class Pattern; + + // Text + PatternPart(const UnicodeString& t) : isRawText(true), text(t), expression(nullptr) {} + // Expression + PatternPart(Expression* e) : isRawText(false), expression(e) {} + + // If !isRawText and the copy of the other expression fails, + // then isBogus() will be true for this PatternPart + // PatternPart needs a copy constructor in order to make Pattern deeply copyable + PatternPart(const PatternPart& other) : isRawText(other.isText()), text(other.text), expression(isRawText ? nullptr : new Expression(other.contents())) { + U_ASSERT(!other.isBogus()); + } + + const bool isRawText; + // Not used if !isRawText + const UnicodeString text; + // null if isRawText + const LocalPointer expression; + + bool isBogus() const { return (!isRawText && !expression.isValid()); } + }; // class PatternPart + + /** + * A `Pattern` is a sequence of formattable parts. + * It corresponds to the `Pattern` interface + * defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#patterns + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Pattern : public UObject { + public: + /** + * Returns the size. + * + * @return The number of parts in the pattern. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + int32_t numParts() const { return parts->length(); } + /** + * Returns the `i`th part in the pattern. + * Precondition: i < numParts() + * + * @param i Index of the part being accessed. + * @return The part at index `i`, which is guaranteed + * to be non-null. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const PatternPart* getPart(int32_t i) const; + + /** + * The mutable `Pattern::Builder` class allows the pattern to be + * constructed one part at a time. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + private: + friend class Pattern; + + Builder(UErrorCode &errorCode); + // Note this is why PatternPart and all its enclosed classes need + // copy constructors: when the build() method is called on `parts`, + // it should copy `parts` rather than moving it + LocalPointer::Builder> parts; + + public: + /** + * Adds a single part to the pattern. Adopts `part`. + * + * @param part The part to be added. Must be non-null. + * @param status Input/output error code + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& add(PatternPart *part, UErrorCode& status); + /** + * Constructs a new immutable `Pattern` using the list of parts + * set with previous `add()` calls. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new pattern, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Pattern* build(UErrorCode &status) const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + }; // class Pattern::Builder + + /** + * Returns a new `Pattern::Builder` object. + * + * @param status Input/output error code. + * @return The new Builder, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& status); + private: + friend class MessageFormatDataModel; + friend class OrderedMap; + + // Possibly-empty list of parts + const LocalPointer> parts; + + bool isBogus() const { return !parts.isValid(); } + // Can only be called by Builder + // Takes ownership of `ps` + Pattern(ImmutableVector *ps) : parts(ps) { U_ASSERT(ps != nullptr); } + + // If the copy of the other list fails, + // then isBogus() will be true for this Pattern + // Pattern needs a copy constructor in order to make MessageFormatDataModel::build() be a copying rather than + // moving build + Pattern(const Pattern& other) : parts(new ImmutableVector(*(other.parts))) { U_ASSERT(!other.isBogus()); } + }; // class Pattern + + /** + * A `Binding` pairs a variable name with an expression. + * It corresponds to the `Declaration` interface + * defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#messages + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Binding { + public: + /** + * Creates a new binding. Adopts `e`, which must be non-null. + * + * @param var The variable name of the declaration. + * @param e The expression (right-hand side) of the declaration. + * @param status Input/output error code. + * @return The new binding, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Binding* create(const VariableName& var, Expression* e, UErrorCode& status); + /** + * Accesses the right-hand side of the binding. + * + * @return A reference to the expression. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Expression& getValue() const; + /** + * Accesses the left-hand side of the binding. + * + * @return A reference to the variable name. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const VariableName& getVariable() const { return var; } + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Binding(); + private: + friend class ImmutableVector; + + const VariableName var; + const LocalPointer value; + + bool isBogus() const { return !value.isValid(); } + + Binding(const VariableName& v, Expression* e) : var(v), value(e){} + // This needs a copy constructor so that `Bindings` is deeply-copyable, + // which is in turn so that MessageFormatDataModel::build() can be copying + // (it has to copy the builder's locals) + Binding(const Binding& other); + }; // class Binding + + // Public MessageFormatDataModel methods + + /** + * Accesses the local variable declarations for this data model. + * + * @return A reference to a list of bindings for local variables. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Bindings& getLocalVariables() const { return *bindings; } + /** + * Determines what type of message this is. + * + * @return true if and only if this data model represents a `selectors` message + * (if it represents a `match` construct with selectors and variants). + * + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool hasSelectors() const; + /** + * Accesses the selectors. + * Precondition: hasSelectors() + * + * @return A reference to the selector list. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const ExpressionList& getSelectors() const; + /** + * Accesses the variants. + * Precondition: hasSelectors() + * + * @return A reference to the variant map. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const VariantMap& getVariants() const; + /** + * Accesses the pattern (in a message without selectors). + * Precondition: !hasSelectors() + * + * @return A reference to the pattern. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const Pattern& getPattern() const; + + /** + * The mutable `MessageFormatDataModel::Builder` class allows the data model to be + * constructed incrementally. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + private: + friend class MessageFormatDataModel; + + Builder(UErrorCode& errorCode); + void buildSelectorsMessage(UErrorCode& errorCode); + LocalPointer pattern; + LocalPointer selectors; + LocalPointer variants; + LocalPointer locals; + + public: + /** + * Adds a local variable declaration. Adopts `expression`, which must be non-null. + * + * @param variableName The variable name of the declaration. + * @param expression The expression to which `variableName` should be bound. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addLocalVariable(const VariableName& variableName, Expression* expression, UErrorCode &status); + /** + * Adds a selector expression. Adopts `expression`, which must be non-null. + * If a pattern was previously set, clears the pattern. + * + * @param selector Expression to add as a selector. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addSelector(Expression* selector, UErrorCode& status); + /** + * Adds a single variant. Adopts `keys` and `pattern`, which must be non-null. + * If a pattern was previously set using `setPattern()`, clears the pattern. + * + * @param keys Keys for the variant. + * @param pattern Pattern for the variant. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& addVariant(SelectorKeys* keys, Pattern* pattern, UErrorCode& status); + /** + * Sets the body of the message as a pattern. + * If selectors and/or variants were previously set, clears them. + * Adopts `pattern`, which must be non-null. + * + * @param pattern Pattern to represent the body of the message. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setPattern(Pattern* pattern); + /** + * Constructs a new immutable data model. + * If `setPattern()` has not been called and if `addSelector()` and + * `addVariant()` were not each called at least once, + * `status` is set to `U_INVALID_STATE_ERROR`. + * If `addSelector()` was called and `addVariant()` was never called, + * or vice versa, then `status` is set to U_INVALID_STATE_ERROR. + * Otherwise, either a Pattern or Selectors message is constructed + * based on the pattern that was previously set, or selectors and variants + * that were previously set. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new MessageFormatDataModel, which is non-null if + * U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + MessageFormatDataModel* build(UErrorCode& status) const; + }; // class MessageFormatDataModel::Builder + /** + * Returns a new `MessageFormatDataModels::Builder` object. + * + * @param status Input/output error code. + * @return The new Builder object, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& status); + + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~MessageFormatDataModel(); + +private: + + // The expressions that are being matched on. + // Null iff this is a `pattern` message. + const LocalPointer selectors; + + // The list of `when` clauses (case arms). + // Null iff this is a `pattern` message. + const LocalPointer variants; + + // The pattern forming the body of the message. + // If this is non-null, then `variants` and `selectors` must be null. + const LocalPointer pattern; + + // Bindings for local variables + const LocalPointer bindings; + + // Normalized version of the input string (optional whitespace omitted) + // Used for testing purposes + const LocalPointer normalizedInput; + + // Do not define default assignment operator + const MessageFormatDataModel &operator=(const MessageFormatDataModel &) = delete; + + MessageFormatDataModel(const Builder& builder, UErrorCode &status); +}; // class MessageFormatDataModel + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT_DATA_MODEL_H + +#endif // U_HIDE_DEPRECATED_API +// eof + diff --git a/icu4c/source/i18n/unicode/messageformat2_formatting_context.h b/icu4c/source/i18n/unicode/messageformat2_formatting_context.h new file mode 100644 index 000000000000..5eb631ba2050 --- /dev/null +++ b/icu4c/source/i18n/unicode/messageformat2_formatting_context.h @@ -0,0 +1,487 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT2_FORMATTING_CONTEXT_H +#define MESSAGEFORMAT2_FORMATTING_CONTEXT_H + +#if U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C++ API: Formats messages using the draft MessageFormat 2.0. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/formattedvalue.h" +#include "unicode/messageformat2_context.h" +#include "unicode/messageformat2_data_model.h" +#include "unicode/messageformat2_macros.h" +#include "unicode/numberformatter.h" +#include "unicode/smpdtfmt.h" + +U_NAMESPACE_BEGIN namespace message2 { + +class Selector; +class SelectorFactory; + +extern void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString&, UErrorCode& errorCode); +extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, double toFormat, UErrorCode& errorCode); +extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int32_t toFormat, UErrorCode& errorCode); +extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, int64_t toFormat, UErrorCode& errorCode); +extern number::FormattedNumber formatNumberWithDefaults(const Locale& locale, StringPiece toFormat, UErrorCode& errorCode); +extern DateFormat* defaultDateTimeInstance(const Locale&, UErrorCode&); + +/** + *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. + * Since it is not final, documentation has not yet been added everywhere. + * + * The following class represents the input to a custom function; it encapsulates + * the function's (unnamed) argument and its named options, as well as providing + * methods for the function to record its output. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +class U_I18N_API FormattingContext : public UObject { + public: + + /** + * Sets the function's output to a string value. + * + * @param output The value of the output. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual void setOutput(const UnicodeString& output) = 0; + /** + * Sets the function's output to a `number::FormattedNumber` value + * + * @param output The value of the output, which is passed by move. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual void setOutput(number::FormattedNumber&& output) = 0; + /** + * Indicates that an error occurred during selection, such as an + * argument with a type that doesn't support selection. Errors are signaled + * internally to the `FormattingContext` object and propagated at the end of + * formatting, and are not signaled using the usual `UErrorCode` mechanism + * (`UErrorCode`s are still used to indicate memory allocation errors and any + * errors signaled by other ICU functions). + * + * @param name Any informative string (usually the name of the selector function). + * @param status Input/output error code + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual void setSelectorError(const UnicodeString& name, UErrorCode& status) = 0; + /** + * Indicates that an error occurred during formatting, such as an argument + * having a type not supported by this formatter. + * + * @param name Any informative string (usually the name of the formatter function). + * @param status Input/output error code + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual void setFormattingError(const UnicodeString&, UErrorCode&) = 0; + /** + * Returns true if and only if a `Formattable` argument was supplied to this + * function. (Object arguments must be checked for using `hasObjectinput()` and + * are not treated as a `Formattable` wrapping an object.) Each function has + * at most one argument, so if `hasFormattableInput()` is true, + * `hasObjectInput()` is false, and vice versa. + * * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool hasFormattableInput() const = 0; + /** + * Accesses the function's argument, assuming it has type `Formattable`. + * It is an internal error to call this method if `!hasFormattableInput()`. + * In particular, if the argument passed in is a UObject*, it is an internal + * error to call `getFormattableInput()` (`getObjectInput()` must be called instead.) + * + * @return A reference to the argument to this function. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual const Formattable& getFormattableInput() const = 0; + /** + * Determines the type of input to this function. + * + * @return True if and only if a `UObject*` argument was supplied to this + * function. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool hasObjectInput() const = 0; + /** + * Accesses the function's argument, assuming it has type `UObject`. + * It is an internal error to call this method if `!hasObjectInput()`. + * + * @return A reference to the argument to this function. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual const UObject& getObjectInput() const = 0; + /** + * Checks if the argument being passed in already has a formatted + * result that is a string. This formatted result may be treated as the input + * to this formatter, or may be overwritten with the result of formatting the + * original input differently. + * + * @return True if and only if formatted string output is present. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool hasStringOutput() const = 0; + /** + * Checks if the argument being passed in already has a formatted + * result that is a number. This formatted result may be treated as the input + * to this formatter, or may be overwritten with the result of formatting the + * original input differently. + * + * @return True if and only if formatted number output is present. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool hasNumberOutput() const = 0; + /** + * Accesses the existing formatted output of this argument as a string. + * It is an internal error to call this method if `!hasStringOutput()`. + * + * @return A reference to the existing formatted string output. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual const UnicodeString& getStringOutput() const = 0; + /** + * Accesses the existing formatted output of this argument as a number. + * It is an internal error to call this method if `!hasNumberOutput()`. + * + * @return A reference to the existing formatted number output. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual const number::FormattedNumber& getNumberOutput() const = 0; + /** + * Looks up the value of a named string option. + * + * @param optionName The name of the option. + * @param optionValue A mutable reference that is set to the string value of + * the option if the named option exists. + * @return True if and only if a string-typed option named `optionName` exists. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool getStringOption(const UnicodeString& optionName, UnicodeString& optionValue) const = 0; + /** + * Looks up the value of a named numeric option of type `double`. + * The return value is true if and only if there is a `double`-typed option + * named `optionName` + * + * @param optionName The name of the option. + * @param optionValue A mutable reference that is set to the `double` value of + * the option if the named option exists. + * @return True if and only if a double-typed option named `optionName` exists. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool getDoubleOption(const UnicodeString& optionName, double& optionValue) const = 0; + /** + * Looks up the value of a named numeric option of type `int64_t`. + * The return value is true if and only if there is a `int64_t`-typed option + * named `optionName` + * + * @param optionName The name of the option. + * @param optionValue A mutable reference that is set to the `double` value of + * the option if the named option exists. + * @return True if and only if a int64-typed option named `optionName` exists. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool getInt64Option(const UnicodeString& optionName, int64_t& optionValue) const = 0; + /** + * Checks for a named object option. + * + * @param optionName The name of the option. + * @return True if and only if an object-typed option named `optionName` exists. + ** + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual UBool hasObjectOption(const UnicodeString& optionName) const = 0; + /** + * Accesses a named object option. + * Precondition: the option must exist. + * + * @param optionName The name of the option. + * @return A reference to the object value of the option. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual const UObject& getObjectOption(const UnicodeString& optionName) const = 0; + /** + * Iterates over all options. The order in which the options are returned is unspecified. + * + * @param pos A mutable reference to the current iterator position. Should be set to + * `firstOption()` before the first call to `nextOption()`. + * @param optionName A mutable reference that is set to the name of the next option + * if the return value is non-null. + * @return A pointer to a `Formattable` (whose value will be string, double, date, or int64; + * other types are not used). The pointer is null if there are no further options + * from `pos`. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual const Formattable* nextOption(int32_t& pos, UnicodeString& optionName) const = 0; + /** + * Used with `nextOption()`. + * + * @return The initial iterator position for `nextOption()`. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual int32_t firstOption() const = 0; + /** + * Gets the number of options. + * + * @return The number of named options. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual int32_t optionsCount() const = 0; + /** + * Formats the current argument as a string, using defaults. If `hasNumberOutput()` is + * true, then the string output is set to the result of formatting the number output, + * and the number output is cleared. If the function's argument is either absent or is + * a fallback value, the output is the result of formatting the fallback value (which + * is the default fallback string if the argument is absent). If the function's argument + * is object-typed, then the argument is treated as a fallback value, since there is + * no default formatter for objects. + * + * @param locale The locale to use for formatting numbers or dates (does not affect + * the formatting of a pre-formatted number, if a number output is already present) + * @param status Input/output error code + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual void formatToString(const Locale& locale, UErrorCode& status) = 0; + + virtual ~FormattingContext(); +}; + +class FunctionRegistry; + +// The ExpressionContext contains everything needed to format a specific operand +// or expression. +class ExpressionContext : public FormattingContext { + private: + + // The input state tracks whether the formatter has a Formattable + // or object input; represents an absent operand; or is in an error state. + enum InputState { + FALLBACK, + NO_OPERAND, // Used when the argument is absent, but there are no errors + OBJECT_INPUT, + FORMATTABLE_INPUT + }; + + // The output state tracks whether (formatted) numeric or string output + // has been generated. + enum OutputState { + NONE, + NUMBER, + STRING + }; + + void clearInput(); + void clearOutput(); + + bool hasFunctionName() const; + const FunctionName& getFunctionName(); + void clearFunctionName(); + // Precondition: hasSelector() + Selector* getSelector(UErrorCode&) const; + // Precondition: hasFormatter() + const Formatter* getFormatter(UErrorCode&); + + void initFunctionOptions(UErrorCode&); + void addFunctionOption(const UnicodeString&, Formattable*, UErrorCode&); + void clearFunctionOptions(); + Formattable* getOption(const UnicodeString&, Formattable::Type) const; + bool tryStringAsNumberOption(const UnicodeString&, double&) const; + Formattable* getNumericOption(const UnicodeString&) const; + + void doFormattingCall(); + void doSelectorCall(const UnicodeString[], int32_t, UnicodeString[], int32_t&, UErrorCode&); + void returnFromFunction(); + + void enterState(InputState s); + void enterState(OutputState s); + void promoteFallbackToOutput(); + void formatInputWithDefaults(const Locale&, UErrorCode&); + + ExpressionContext(MessageContext&, UErrorCode&); + + friend class MessageArguments; + friend class MessageFormatter; + + MessageContext& context; + + InputState inState; + OutputState outState; + + // Function name that has been set but not yet invoked on an argument + LocalPointer pendingFunctionName; + + // Fallback string to use in case of errors + UnicodeString fallback; + +/* + Object and Formattable inputs are stored separately to avoid accidental copying + of a Formattable containing an object, which would occur if the Formattable + assignment operator was used. The copy constructor for Formattables assumes that + an object stored in a Formattable has type Measure. Since MessageFormat allows + custom functions to take object arguments of any type that inherits from UObject, + we have to ensure that a Formattable is never copied. +*/ + // Input arises from literals or a message argument + // Invariant: input.getType != kObject (object Formattables can't be copied) + Formattable input; + // (An object input can only originate from a message argument) + // Invariant: ((isObject && objectInput != nullptr) || (!isObject && objectInput == nullptr) + const UObject* objectInput; + const UObject* getObjectInputPointer() const; + + // Output is returned by a formatting function + UnicodeString stringOutput; + number::FormattedNumber numberOutput; + + // Named options passed to functions + LocalPointer functionOptions; + + // Creates a new context with the given `MessageContext` as its parent + static ExpressionContext* create(MessageContext&, UErrorCode&); + // Creates a new context sharing this's context and parent + ExpressionContext* create(UErrorCode&); + + const MessageContext& messageContext() const { return context; } + + // Resets input and output and uses existing fallback + void setFallback(); + // Sets fallback string + void setFallbackTo(const FunctionName&); + void setFallbackTo(const VariableName&); + void setFallbackTo(const MessageFormatDataModel::Literal&); + // Sets the fallback string as input and exits the error state + void promoteFallbackToInput(); + + void setFunctionName(const FunctionName&, UErrorCode&); + // Function name must be set; clears it + void resolveSelector(Selector*); + + void setStringOption(const UnicodeString&, const UnicodeString&, UErrorCode&); + void setDateOption(const UnicodeString&, UDate, UErrorCode&); + void setNumericOption(const UnicodeString&, double, UErrorCode&); + void setObjectOption(const UnicodeString&, const UObject*, UErrorCode&); + + void setNoOperand(); + void setInput(const UObject*); + void setInput(const Formattable&); + void setInput(const UnicodeString&); + void setObjectInput(UObject*); + void setOutput(const UnicodeString&) override; + void setOutput(number::FormattedNumber&&) override; + + // If there is a function name, clear it and + // call the function, setting the input and/or output appropriately + // Precondition: hasFormatter() + void evalFormatterCall(const FunctionName&, UErrorCode&); + // If there is a function name, clear it and + // call the function, setting the input and/or output appropriately + // Precondition: hasSelector() + void evalPendingSelectorCall(const UVector&, UVector&, UErrorCode&); + + static Formattable* createFormattable(const UnicodeString&, UErrorCode&); + static Formattable* createFormattable(double, UErrorCode&); + static Formattable* createFormattable(int64_t, UErrorCode&); + static Formattable* createFormattableDate(UDate, UErrorCode&); + static Formattable* createFormattableDecimal(StringPiece, UErrorCode&); + static Formattable* createFormattable(const UnicodeString*, int32_t, UErrorCode&); + static Formattable* createFormattable(const UObject*, UErrorCode&); + + public: + + // Precondition: pending function name is set + bool hasSelector() const; + // Precondition: pending function name is set + bool hasFormatter() const; + + bool isFallback() const; + + bool hasInput() const { return hasFormattableInput() || hasObjectInput(); } + UBool hasFormattableInput() const override; + UBool hasObjectInput() const override; + const Formattable& getFormattableInput() const override; + const UObject& getObjectInput() const override; + + UBool hasStringOutput() const override; + UBool hasNumberOutput() const override; + bool hasOutput() { return (hasStringOutput() || hasNumberOutput()); } + // Just gets existing output, doesn't force evaluation + const UnicodeString& getStringOutput() const override; + const number::FormattedNumber& getNumberOutput() const override; + // Forces evaluation + void formatToString(const Locale&, UErrorCode&) override; + + UBool getStringOption(const UnicodeString&, UnicodeString&) const override; + UBool getDoubleOption(const UnicodeString&, double&) const override; + UBool getInt64Option(const UnicodeString&, int64_t&) const override; + UBool hasObjectOption(const UnicodeString&) const override; + const UObject& getObjectOption(const UnicodeString&) const override; + // Function options iterator + int32_t firstOption() const override; + int32_t optionsCount() const override; + const Formattable* nextOption(int32_t&, UnicodeString&) const override; + + void setSelectorError(const UnicodeString&, UErrorCode&) override; + void setFormattingError(const UnicodeString&, UErrorCode&) override; + + virtual ~ExpressionContext(); +}; + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT2_FORMATTING_CONTEXT_H + +#endif // U_HIDE_DEPRECATED_API +// eof diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h new file mode 100644 index 000000000000..24b5e75a0fed --- /dev/null +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -0,0 +1,405 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT2_FUNCTION_REGISTRY_H +#define MESSAGEFORMAT2_FUNCTION_REGISTRY_H + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/datefmt.h" +#include "unicode/format.h" +#include "unicode/messageformat2_data_model.h" +#include "unicode/messageformat2_formatting_context.h" +#include "unicode/messageformat2_macros.h" +#include "unicode/numberformatter.h" +#include "unicode/unistr.h" +#include "unicode/upluralrules.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN namespace message2 { + +class Formatter; +class Selector; + +/** + * Interface that factory classes for creating formatters must implement. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +class U_COMMON_API FormatterFactory : public UObject { + // TODO: the coding guidelines say that interface classes + // shouldn't inherit from UObject, but if I change it so these + // classes don't, and the individual formatter factory classes + // inherit from public FormatterFactory, public UObject, then + // memory leaks ensue +public: + /** + * Constructs a new formatter object. This method is not const; + * formatter factories with local state may be defined. + * + * @param locale Locale to be used by the formatter. + * @param status Input/output error code. + * @return The new Formatter, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual Formatter* createFormatter(const Locale& locale, UErrorCode& status) = 0; + virtual ~FormatterFactory(); +}; // class FormatterFactory + +/** + * Interface that factory classes for creating selectors must implement. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +class U_COMMON_API SelectorFactory : public UObject { +public: + /** + * Constructs a new selector object. + * + * @param locale Locale to be used by the selector. + * @param status Input/output error code. + * @return The new selector, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual Selector* createSelector(const Locale& locale, UErrorCode& status) const = 0; + virtual ~SelectorFactory(); +}; // class SelectorFactory + +/** + * Defines mappings from names of formatters and selectors to functions implementing them. + * The required set of formatter and selector functions is defined in the spec. Users can + * also define custom formatter and selector functions. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +class U_I18N_API FunctionRegistry : public UObject { +public: + /** + * Looks up a formatter factory by the name of the formatter. The result is non-const, + * since formatter factories may have local state. + * + * @param formatterName Name of the desired formatter. + * @return The new FormatterFactory, or null if no formatter factory has + * been registered under `formatterName`. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + FormatterFactory* getFormatter(const FunctionName& formatterName) const; + /** + * Looks up a selector factory by the name of the selector. + * + * @param selectorName Name of the desired selector. + * @return The new SelectorFactory, or null if no selector factory has + * been registered under `selectorName`. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const SelectorFactory* getSelector(const FunctionName& selectorName) const; + + /** + * The mutable Builder class allows each formatter and selector factory + * to be initialized separately; calling its `build()` method yields an + * immutable FunctionRegistry object. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UObject { + private: + friend class FunctionRegistry; + + Builder(UErrorCode& status); + LocalPointer formatters; + LocalPointer selectors; + public: + /** + * Registers a formatter factory to a given formatter name. Adopts `formatterFactory`. + * + * @param formatterName Name of the formatter being registered. + * @param formatterFactory A FormatterFactory object to use for creating `formatterName` + * formatters. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setFormatter(const FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& status); + /** + * Registers a selector factory to a given selector name. Adopts `selectorFactory`. + * + * @param selectorName Name of the selector being registered. + * @param selectorFactory A SelectorFactory object to use for creating `selectorName` + * selectors. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& setSelector(const FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& status); + /** + * Creates an immutable `FunctionRegistry` object with the selectors and formatters + * that were previously registered. The builder cannot be used after this call. + * + * @param status Input/output error code. + * @return A reference to the new FunctionRegistry, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + FunctionRegistry* build(UErrorCode& status); + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + }; // class FunctionRegistry::Builder + /** + * Returns a new `FunctionRegistry::Builder` object. + * + * @param status Input/output error code. + * @return A reference to the new Builder, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode& status); + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~FunctionRegistry(); + +private: + friend class Builder; + friend class MessageContext; + friend class MessageFormatter; + + // Adopts `f` and `s` + FunctionRegistry(Hashtable* f, Hashtable* s) : formatters(f), selectors(s) {} + + // Debugging; should only be called on a function registry with + // all the standard functions registered + void checkFormatter(const char*) const; + void checkSelector(const char*) const; + void checkStandard() const; + + bool hasFormatter(const FunctionName& f) const { + if (!formatters->containsKey(f.toString())) { + return false; + } + U_ASSERT(getFormatter(f) != nullptr); + return true; + } + bool hasSelector(const FunctionName& s) const { + if (!selectors->containsKey(s.toString())) { + return false; + } + U_ASSERT(getSelector(s) != nullptr); + return true; + } + const LocalPointer formatters; + const LocalPointer selectors; + }; // class FunctionRegistry + +/** + * Interface that formatter classes must implement. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +class U_COMMON_API Formatter : public UObject { +public: + /** + * Formats the input passed in `context` by setting an output using one of the + * `FormattingContext` methods or indicating an error. + * + * @param context Formatting context; captures the unnamed function argument, + * current output, named options, and output. See the `FormattingContext` + * documentation for more details. + * @param status Input/output error code. Should not be set directly by the + * custom formatter, which should use `FormattingContext::setFormattingWarning()` + * to signal errors. The custom formatter may pass `status` to other ICU functions + * that can signal errors using this mechanism. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual void format(FormattingContext& context, UErrorCode& status) const = 0; + virtual ~Formatter(); +}; // class Formatter + +/** + * Interface that selector classes must implement. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +class U_COMMON_API Selector : public UObject { +public: + /** + * Compares the input passed in `context` to an array of keys, and returns an array of matching + * keys sorted by preference. + * + * @param context Formatting context; captures the unnamed function argument and named options. + * See the `FormattingContext` documentation for more details. + * @param keys An array of pointers to strings that are compared to the input (`context.getFormattableInput()`) + * in an implementation-specific way. + * @param numKeys The length of the `keys` array. + * @param prefs A mutable reference to an array of pointers to strings. `selectKey()` should set the contents + * of `prefs` to a subset of `keys`, with the best match placed at the lowest index. + * @param numMatching A mutable reference that should be set to the length of the `prefs` array. + * @param status Input/output error code. Should not be set directly by the + * custom selector, which should use `FormattingContext::setSelectorError()` + * to signal errors. The custom selector may pass `status` to other ICU functions + * that can signal errors using this mechanism. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual void selectKey(FormattingContext& context, UnicodeString** keys/*[]*/, int32_t numKeys, UnicodeString** prefs/*[]*/, int32_t& numMatching, UErrorCode& status) const = 0; + virtual ~Selector(); +}; // class Selector + +// Built-in functions +/* + The standard functions are :datetime, :number, + :identity, :plural, :selectordinal, :select, and :gender. + Subject to change +*/ +class StandardFunctions { + friend class MessageFormatter; + + class DateTimeFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; + virtual ~DateTimeFactory(); + }; + + class DateTime : public Formatter { + public: + void format(FormattingContext& context, UErrorCode& status) const override; + virtual ~DateTime(); + + private: + const Locale& locale; + friend class DateTimeFactory; + DateTime(const Locale& l) : locale(l) {} + const LocalPointer icuFormatter; + }; + + class NumberFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; + virtual ~NumberFactory(); + }; + + class Number : public Formatter { + public: + void format(FormattingContext& context, UErrorCode& status) const override; + virtual ~Number(); + + private: + friend class NumberFactory; + + Number(const Locale& loc) : locale(loc), icuFormatter(number::NumberFormatter::withLocale(loc)) {} + + const Locale& locale; + const number::LocalizedNumberFormatter icuFormatter; + }; + + class IdentityFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; + virtual ~IdentityFactory(); + }; + + class Identity : public Formatter { + public: + void format(FormattingContext& context, UErrorCode& status) const override; + virtual ~Identity(); + + private: + friend class IdentityFactory; + + const Locale& locale; + Identity(const Locale& loc) : locale(loc) {} + }; + + class PluralFactory : public SelectorFactory { + public: + Selector* createSelector(const Locale& locale, UErrorCode& status) const override; + virtual ~PluralFactory(); + + private: + friend class MessageFormatter; + + PluralFactory(UPluralType t) : type(t) {} + const UPluralType type; + }; + + class Plural : public Selector { + public: + void selectKey(FormattingContext& context, UnicodeString** keys/*[]*/, int32_t numKeys, UnicodeString** prefs/*[]*/, int32_t& numMatching, UErrorCode& status) const override; + virtual ~Plural(); + + private: + friend class PluralFactory; + + // Adopts `r` + Plural(const Locale& loc, PluralRules* r) : locale(loc), rules(r) {} + + const Locale& locale; + LocalPointer rules; + }; + + class TextFactory : public SelectorFactory { + public: + Selector* createSelector(const Locale& locale, UErrorCode& status) const override; + virtual ~TextFactory(); + }; + + class TextSelector : public Selector { + public: + void selectKey(FormattingContext& context, UnicodeString** keys/*[]*/, int32_t numKeys, UnicodeString** prefs/*[]*/, int32_t& numMatching, UErrorCode& status) const override; + virtual ~TextSelector(); + + private: + friend class TextFactory; + + // Formatting `value` to a string might require the locale + const Locale& locale; + + TextSelector(const Locale& l) : locale(l) {} + }; +}; + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_H + +#endif // U_HIDE_DEPRECATED_API +// eof diff --git a/icu4c/source/i18n/unicode/messageformat2_macros.h b/icu4c/source/i18n/unicode/messageformat2_macros.h new file mode 100644 index 000000000000..bf1f6d8e1869 --- /dev/null +++ b/icu4c/source/i18n/unicode/messageformat2_macros.h @@ -0,0 +1,126 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT2_MACROS_H +#define MESSAGEFORMAT2_MACROS_H + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "plurrule_impl.h" + +#include "unicode/format.h" +#include "unicode/messageformat2_data_model.h" +#include "unicode/unistr.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN namespace message2 { + +using namespace pluralimpl; + +// Tokens for parser and serializer + +// Syntactically significant characters +#define LEFT_CURLY_BRACE ((UChar32)0x007B) +#define RIGHT_CURLY_BRACE ((UChar32)0x007D) +#define HTAB ((UChar32)0x0009) +#define CR ((UChar32)0x000D) +#define LF ((UChar32)0x000A) + + +#define PIPE ((UChar32)0x007C) +#define EQUALS ((UChar32)0x003D) +#define DOLLAR ((UChar32)0x0024) +#define COLON ((UChar32)0x003A) +#define PLUS ((UChar32)0x002B) +#define HYPHEN ((UChar32)0x002D) +#define PERIOD ((UChar32)0x002E) +#define UNDERSCORE ((UChar32)0x005F) + + +// Reserved sigils +#define BANG ((UChar32)0x0021) +#define AT ((UChar32)0x0040) +#define PERCENT ((UChar32)0x0025) +#define CARET ((UChar32)0x005E) +#define AMPERSAND ((UChar32)0x0026) +#define LESS_THAN ((UChar32)0x003C) +#define GREATER_THAN ((UChar32)0x003E) +#define QUESTION ((UChar32)0x003F) +#define TILDE ((UChar32)0x007E) + +// Fallback +#define REPLACEMENT ((UChar32) 0xFFFD) + +// MessageFormat2 uses three keywords: `let`, `when`, and `match`. + +static constexpr UChar32 ID_LET[] = { + 0x6C, 0x65, 0x74, 0 /* "let" */ +}; + +static constexpr UChar32 ID_WHEN[] = { + 0x77, 0x68, 0x65, 0x6E, 0 /* "when" */ +}; + +static constexpr UChar32 ID_MATCH[] = { + 0x6D, 0x61, 0x74, 0x63, 0x68, 0 /* "match" */ +}; + +// See `s` in the MessageFormat 2 grammar +inline bool isWhitespace(UChar32 c) { + switch (c) { + case SPACE: + case HTAB: + case CR: + case LF: + return true; + default: + return false; + } +} + +// Returns immediately if `errorCode` indicates failure +#define CHECK_ERROR(errorCode) \ + if (U_FAILURE(errorCode)) { \ + return; \ + } + +// Returns immediately if `errorCode` indicates failure +#define NULL_ON_ERROR(errorCode) \ + if (U_FAILURE(errorCode)) { \ + return nullptr; \ + } + +// Returns immediately if `errorCode` indicates failure +#define THIS_ON_ERROR(errorCode) \ + if (U_FAILURE(errorCode)) { \ + return *this; \ + } + +// Returns immediately if `errorCode` indicates failure +#define FALSE_ON_ERROR(errorCode) \ + if (U_FAILURE(errorCode)) { \ + return false; \ + } + +inline void setError(UErrorCode newError, UErrorCode& existingError) { + // Don't overwrite an existing warning + if (existingError == U_ZERO_ERROR) { + existingError = newError; + } +} + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT2_MACROS_H + +#endif // U_HIDE_DEPRECATED_API +// eof diff --git a/icu4c/source/i18n/unicode/messageformat2_utils.h b/icu4c/source/i18n/unicode/messageformat2_utils.h new file mode 100644 index 000000000000..8e2218622ab5 --- /dev/null +++ b/icu4c/source/i18n/unicode/messageformat2_utils.h @@ -0,0 +1,364 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef U_HIDE_DEPRECATED_API + +#ifndef MESSAGEFORMAT_UTILS_H +#define MESSAGEFORMAT_UTILS_H + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2_macros.h" +#include "unicode/unistr.h" +#include "unicode/utypes.h" +#include "hash.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN namespace message2 { + +// Defined for convenience, in case we end up using a different +// representation in the data model for variable references and/or +// variable definitions +static inline UBool compareVariableName(const UElement e1, const UElement e2) { + return uhash_compareUnicodeString(e1, e2); +} + + +/** + * The `ImmutableVector` class represents a polymorphic immutable list, + * constructed using the builder pattern. It's used to represent + * various nodes in the MessageFormat data model that may have a + * variable number of components. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +template +class U_I18N_API ImmutableVector : public UMemory { + +private: + // If a copy constructor fails, the list is left in an inconsistent state, + // because copying has to allocate a new vector. + // Copy constructors can't take error codes as arguments. So we have to + // resort to this, and all methods must check the invariant and signal an + // error if it's false. The error should be U_MEMORY_ALLOCATION_ERROR, + // since isBogus iff an allocation failed. + // For classes that contain a ImmutableVector member, there is no guarantee that + // the list will be non-bogus. ImmutableVector operations use assertions to detect + // this condition as early as possible. + bool isBogus() const { return !contents.isValid(); } + +public: + /** + * Size accessor. + * + * @return The number of elements in this list. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + int32_t length() const; + + /** + * Element accessor. + * Precondition: i < length() + * + * @param i The index to access. + * @return The list element at `i` + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + const T* get(int32_t i) const; + + /** + * Checks for the existence of an element. + * + * @param element The item to search for. + * @return True if and only if `element` occurs in this list. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool contains(const T& element) const; + + /** + * Finds the index of an element. + * + * @param element The item to search for. + * @param index A mutable reference that is set to the first index + * where `element` occurs in this list, if it occurs. + * @return True if and only if `element` occurs in this list. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool find(const T& element, int32_t& index) const; + + /** + * Copy constructor. Performs a deep copy (`T` must have + * a copy constructor.) + * + * @param other The ImmutableVector to copy. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + ImmutableVector(const ImmutableVector& other); + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~ImmutableVector(); + + /** + * The mutable `ImmutableVector::Builder` class allows the list to be constructed + * one element at a time. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + public: + /** + * Adds to the list. Adopts `element`. + * + * @param element The element to be added. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& add(T *element, UErrorCode& status); + /** + * Constructs a new `ImmutableVector` using the list of elements + * set with previous `add()` calls. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new ImmutableVector, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + ImmutableVector* build(UErrorCode &errorCode) const; + + virtual ~Builder(); + private: + friend class ImmutableVector; + LocalPointer contents; + Builder(UErrorCode& errorCode); + }; // class ImmutableVector::Builder + + /** + * Returns a new `ImmutableVector::Builder` object. + * + * @param status Input/output error code. + * @return The new Builder object, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static Builder* builder(UErrorCode &errorCode); + +private: + friend class Builder; // NOTE: Builder should only call buildList(); not the constructors + + // Helper functions for vector copying + // T1 must have a copy constructor + // This may leave dst->pointer == nullptr, which is handled by the UVector assign() method + template + static void copyElements(UElement *dst, UElement *src); + + // Copies the contents of `builder` + // This won't compile unless T is a type that has a copy assignment operator + static ImmutableVector* buildList(const Builder &builder, UErrorCode &errorCode); + + // Adopts `contents` + ImmutableVector(UVector* things) : contents(things) { U_ASSERT(things != nullptr); } + + // Used as const, but since UVector doesn't have a copy constructor, + // writing the copy constructor for ImmutableVector requires `contents` to be non-const + LocalPointer contents; +}; // class ImmutableVector + + +/** + * The `OrderedMap` class represents a polymorphic hash table with string + * keys, constructed using the builder pattern. It's used to represent + * various nodes in the MessageFormat data model that may have a + * variable number of named components. The map records the order in which + * keys were added and iterates over its elements in that order. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ +template +class U_I18N_API OrderedMap : public UMemory { +class MessageFormatDataModel { + class Operator; + class VariantMap; +}; + +private: + // See comments under `ImmutableVector::isBogus()` + bool isBogus() const { return (!contents.isValid() || !keys.isValid()); } + +public: + /** + * Used with `next()`. + * + * The initial iterator position for `next()`. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + static constexpr int32_t FIRST = 0; + /** + * Iterates over all keys in the order in which they were added. + * + * @param pos A mutable reference to the current iterator position. Should be set to + * `FIRST` before the first call to `next()`. + * @param k A mutable reference that is set to the name of the next key + * if the return value is true. + * @param v A mutable reference to a pointer to an element of the map's value type, + * which is non-null if the return value is true. + * @return True if and only if there are elements starting at `pos`. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool next(int32_t &pos, UnicodeString& k, const V*& v) const; + /** + * Size accessor. + * + * @return The number of elements in this map. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + int32_t size() const; + + /** + * Copy constructor. Performs a deep copy (`V` must have + * a copy constructor.) + * + * @param other The OrderedMap to copy. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + OrderedMap(const OrderedMap& other); + + /** + * The mutable `OrderedMap::Builder` class allows the map to be constructed + * one key/value pair at a time. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + class U_I18N_API Builder : public UMemory { + public: + /** + * Adds to the map. Adopts `value`. + * Precondition: !has(key) + * + * @param key The name to be added. It is an internal error to + * call `add()` with a key that has already been added. + * @param value The value to be associated with the name. + * @param status Input/output error code. + * @return A reference to the builder. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + Builder& add(const UnicodeString& key, V* value, UErrorCode& status); + /** + * Checks if a key is in the map. + * + * @param key Reference to a (string) key. + * @return True if and only if `key` is mapped to a value in the map. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + UBool has(const UnicodeString& key) const; + /** + * Constructs a new `OrderedMap` using the keys and values + * set with previous `add()` calls. + * + * The builder object (`this`) can still be used after calling `build()`. + * + * @param status Input/output error code. + * @return The new OrderedMap, which is non-null if U_SUCCESS(status). + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + OrderedMap* build(UErrorCode& errorCode) const; + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~Builder(); + private: + friend class OrderedMap; + + // Only called by builder() + Builder(UErrorCode& errorCode); + + // Hashtable representing the underlying map + LocalPointer contents; + // Maintain a list of keys that encodes the order in which + // keys are added. This wastes some space, but allows us to + // re-use ICU4C's Hashtable abstraction without re-implementing + // an ordered version of it. + LocalPointer keys; + }; // class OrderedMap::Builder + + /** + * Destructor. + * + * @internal ICU 74.0 technology preview + * @deprecated This API is for technology preview only. + */ + virtual ~OrderedMap(); + static Builder* builder(UErrorCode &errorCode); + +private: + + // Helper methods for copy constructor + static void copyStrings(UElement *dst, UElement *src); + static UVector* copyStringVector(const UVector& other); + // Postcondition: U_FAILURE(errorCode) || !((return value).isBogus()) + static OrderedMap* create(Hashtable* c, UVector* k, UErrorCode& errorCode); + static Hashtable* copyHashtable(const Hashtable& other); + OrderedMap(Hashtable* c, UVector* k); + // Hashtable representing the underlying map + const LocalPointer contents; + // List of keys + const LocalPointer keys; +}; // class OrderedMap + +#include "messageformat2_utils_impl.h" + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // MESSAGEFORMAT_UTILS_H + +#endif // U_HIDE_DEPRECATED_API +// eof + diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index af3493def5cb..745f099b015d 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -49,6 +49,7 @@ fldset.o dadrfmt.o dadrcal.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \ itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \ loctest.o localebuildertest.o localematchertest.o \ +messageformat2test.o messageformat2test_builtin.o messageformat2test_custom.o messageformat2test_features.o messageformat2test_fromjson.o messageformat2test_icu.o \ miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \ numfmtst.o numrgts.o plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \ sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \ diff --git a/icu4c/source/test/intltest/itformat.cpp b/icu4c/source/test/intltest/itformat.cpp index 5c67d189bead..c51103c6ff3b 100644 --- a/icu4c/source/test/intltest/itformat.cpp +++ b/icu4c/source/test/intltest/itformat.cpp @@ -33,6 +33,7 @@ #include "dtfmapts.h" // DateFormatAPI #include "dtfmttst.h" // DateFormatTest #include "tmsgfmt.h" // TestMessageFormat +#include "messageformat2test.h" // TestMessageFormat2 #include "dtfmrgts.h" // DateFormatRegressionTest #include "msfmrgts.h" // MessageFormatRegressionTest #include "miscdtfm.h" // DateFormatMiscTests @@ -287,6 +288,7 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam callTest(*test, par); } break; + TESTCLASS(60,message2::TestMessageFormat2); default: name = ""; break; //needed to end loop } if (exec) { diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp new file mode 100644 index 000000000000..4c6975169411 --- /dev/null +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -0,0 +1,904 @@ +// © 2016 and later: Unicode, Inc. and others. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "messageformat2test.h" + +using namespace icu::message2; + +/* + TODO: Tests need to be unified in a single format that + both ICU4C and ICU4J can use, rather than being embedded in code. + + Tests are included in their current state to give a sense of + how much test coverage has been achieved. Most of the testing is + of the parser/serializer; the formatter needs to be tested more + thoroughly. +*/ + +/* +Tests reflect the syntax specified in + + https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf + +as of the following commit from 2023-05-09: + https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867 + +*/ + +static const int32_t numValidTestCases = 25; +TestResult validTestCases[] = { + {"{hello {|4.2| :number}}", "hello 4.2"}, + {"{hello {|4.2| :number minimumFractionDigits=2}}", "hello 4.20"}, + {"{hello {|4.2| :number minimumFractionDigits = 2}}", "hello 4.20"}, + {"{hello {|4.2| :number minimumFractionDigits= 2}}", "hello 4.20"}, + {"{hello {|4.2| :number minimumFractionDigits =2}}", "hello 4.20"}, + {"{hello {|4.2| :number minimumFractionDigits=2 }}", "hello 4.20"}, + {"{hello {|4.2| :number minimumFractionDigits=2 bar=3}}", "hello 4.20"}, + {"{hello {|4.2| :number minimumFractionDigits=2 bar=3 }}", "hello 4.20"}, + {"{hello {|4.2| :number minimumFractionDigits=|2|}}", "hello 4.20"}, + {"{content -tag}", "content -tag"}, + {"{}", ""}, + // tests for escape sequences in literals + {"{{|hel\\\\lo|}}", "hel\\lo"}, + {"{{|hel\\|lo|}}", "hel|lo"}, + {"{{|hel\\|\\\\lo|}}", "hel|\\lo"}, + // tests for text escape sequences + {"{hel\\{lo}", "hel{lo"}, + {"{hel\\}lo}", "hel}lo"}, + {"{hel\\\\lo}", "hel\\lo"}, + {"{hel\\{\\\\lo}", "hel{\\lo"}, + {"{hel\\{\\}lo}", "hel{}lo"}, + // tests for ':' in unquoted literals + {"match {|foo| :select} when o:ne {one} when * {other}", "other"}, + {"match {|foo| :select} when one: {one} when * {other}", "other"}, + {"let $foo = {|42| :number option=a:b} {bar {$foo}}", "bar 42"}, + {"let $foo = {|42| :number option=a:b:c} {bar {$foo}}", "bar 42"}, + // tests for newlines in literals and text + {"{hello {|wo\nrld|}}", "hello wo\nrld"}, + {"{hello wo\nrld}", "hello wo\nrld"} +}; + + +static const int32_t numResolutionErrors = 6; +TestResultError jsonTestCasesResolutionError[] = { + {"let $foo = {$bar} match {$foo :plural} when one {one} when * {other}", "other", U_UNRESOLVED_VARIABLE_ERROR}, + {"let $foo = {$bar} match {$foo :plural} when one {one} when * {other}", "other", U_UNRESOLVED_VARIABLE_ERROR}, + {"let $bar = {$none :plural} match {$foo :select} when one {one} when * {{$bar}}", "{$none}", U_UNRESOLVED_VARIABLE_ERROR}, + {"{{|content| +tag}}", "{|content|}", U_UNKNOWN_FUNCTION_ERROR}, + {"{{|content| -tag}}", "{|content|}", U_UNKNOWN_FUNCTION_ERROR}, + {"{{|content| +tag} {|content| -tag}}", "{|content|} {|content|}", U_UNKNOWN_FUNCTION_ERROR}, + {"{content {|foo| +markup}}", "content {|foo|}", U_UNKNOWN_FUNCTION_ERROR} +}; + +static const int32_t numReservedErrors = 34; +UnicodeString reservedErrors[] = { + // tests for reserved syntax + "{hello {|4.2| @number}}", + "{hello {|4.2| @n|um|ber}}", + "{hello {|4.2| &num|be|r}}", + "{hello {|4.2| ?num|be||r|s}}", + "{hello {|foo| !number}}", + "{hello {|foo| *number}}", + "{hello {#number}}", + "{{num x \\\\ abcde |aaa||3.14||42| r }}", + "{hello {$foo >num x \\\\ abcde |aaa||3.14| |42| r }}", + 0 +}; + +static const int32_t numMatches = 15; +UnicodeString matches[] = { + // multiple scrutinees, with or without whitespace + "match {$foo :select} {$bar :select} when one * {one} when * * {other}", + "match {$foo :select} {$bar :select}when one * {one} when * * {other}", + "match {$foo :select}{$bar :select} when one * {one} when * * {other}", + "match {$foo :select}{$bar :select}when one * {one} when * * {other}", + "match{$foo :select} {$bar :select} when one * {one} when * * {other}", + "match{$foo :select} {$bar :select}when one * {one} when * * {other}", + "match{$foo :select}{$bar :select} when one * {one} when * * {other}", + "match{$foo :select}{$bar :select}when one * {one} when * * {other}", + // multiple variants, with or without whitespace + "match {$foo :select} {$bar :select} when one * {one} when * * {other}", + "match {$foo :select} {$bar :select} when one * {one}when * * {other}", + "match {$foo :select} {$bar :select}when one * {one} when * * {other}", + "match {$foo :select} {$bar :select}when one * {one}when * * {other}", + // one or multiple keys, with or without whitespace before pattern + "match {$foo :select} {$bar :select} when one *{one} when * * {foo}", + "match {$foo :select} {$bar :select} when one * {one} when * * {foo}", + "match {$foo :select} {$bar :select} when one * {one} when * * {foo}" +}; + +static const int32_t numSyntaxTests = 22; +// These patterns are tested to ensure they parse without a syntax error +UnicodeString syntaxTests[] = { + "{hello {|foo| :number }}", + // zero, one or multiple options, with or without whitespace before '}' + "{{:foo}}", + "{{:foo }}", + "{{:foo }}", + "{{:foo k=v}}", + "{{:foo k=v }}", + "{{:foo k1=v1 k2=v2}}", + "{{:foo k1=v1 k2=v2 }}", + // literals or variables followed by space, with or without an annotation following + "{{|3.14| }}", + "{{|3.14| }}", + "{{|3.14| :foo}}", + "{{|3.14| :foo }}", + "{{$bar }}", + "{{$bar }}", + "{{$bar :foo}}", + "{{$bar :foo }}", + // Trailing whitespace at end of message should be accepted + "match {$foo :select} {$bar :select} when one * {one} when * * {other} ", + "{hi} ", + // Variable names can contain '-' or ':' + "{{$bar:foo}}", + "{{$bar-foo}}", + // Name shadowing is allowed + "let $foo = {|hello|} let $foo = {$foo} {{$foo}}", + // Unquoted literal -- should work + "{good {placeholder}}", + 0 +}; + +void +TestMessageFormat2::runIndexedTest(int32_t index, UBool exec, + const char* &name, char* /*par*/) { + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(featureTests); + TESTCASE_AUTO(messageFormat1Tests); + TESTCASE_AUTO(testAPICustomFunctions); + TESTCASE_AUTO(testCustomFunctions); + TESTCASE_AUTO(testBuiltInFunctions); + TESTCASE_AUTO(testDataModelErrors); + TESTCASE_AUTO(testResolutionErrors); + TESTCASE_AUTO(testAPI); + TESTCASE_AUTO(testAPISimple); + TESTCASE_AUTO(testVariousPatterns); + TESTCASE_AUTO(testInvalidPatterns); + TESTCASE_AUTO_END; +} + +// Example for design doc -- version without null and error checks +void TestMessageFormat2::testAPISimple() { + IcuTestErrorCode errorCode1(*this, "testAPI"); + UErrorCode errorCode = (UErrorCode) errorCode1; + UParseError parseError; + Locale locale = "en_US"; + + // Since this is the example used in the + // design doc, it elides null checks and error checks. + // To be used in the test suite, it should include those checks + // Null checks and error checks elided + MessageFormatter::Builder* builder = MessageFormatter::builder(errorCode); + MessageFormatter* mf = builder->setPattern(u"{Hello, {$userName}!}") + .build(parseError, errorCode); + + MessageArguments::Builder* argsBuilder = MessageArguments::builder(errorCode); + argsBuilder->add("userName", "John", errorCode); + MessageArguments* args = argsBuilder->build(errorCode); + + UnicodeString result; + mf->formatToString(*args, errorCode, result); + assertEquals("testAPI", result, "Hello, John!"); + result.remove(); + + delete mf; + mf = builder->setPattern("{Today is {$today :datetime skeleton=yMMMdEEE}.}") + .setLocale(locale) + .build(parseError, errorCode); + + Calendar* cal(Calendar::createInstance(errorCode)); + // Sunday, October 28, 2136 8:39:12 AM PST + cal->set(2136, Calendar::OCTOBER, 28, 8, 39, 12); + UDate date = cal->getTime(errorCode); + + argsBuilder->addDate("today", date, errorCode); + delete args; + args = argsBuilder->build(errorCode); + mf->formatToString(*args, errorCode, result); + assertEquals("testAPI", "Today is Sun, Oct 28, 2136.", result); + result.remove(); + + argsBuilder->addInt64("photoCount", 12, errorCode); + argsBuilder->add("userGender", "feminine", errorCode); + argsBuilder->add("userName", "Maria", errorCode); + delete args; + args = argsBuilder->build(errorCode); + + delete mf; + mf = builder->setPattern("match {$photoCount :plural} {$userGender :select}\n\ + when 1 masculine {{$userName} added a new photo to his album.}\n \ + when 1 feminine {{$userName} added a new photo to her album.}\n \ + when 1 * {{$userName} added a new photo to their album.}\n \ + when * masculine {{$userName} added {$photoCount} photos to his album.}\n \ + when * feminine {{$userName} added {$photoCount} photos to her album.}\n \ + when * * {{$userName} added {$photoCount} photos to their album.}") + .setLocale(locale) + .build(parseError, errorCode); + mf->formatToString(*args, errorCode, result); + assertEquals("testAPI", "Maria added 12 photos to her album.", result); + + delete builder; + delete argsBuilder; + delete cal; + delete mf; + delete args; +} + +// Design doc example, with more details +void TestMessageFormat2::testAPI() { + IcuTestErrorCode errorCode(*this, "testAPI"); + Locale locale = "en_US"; + LocalPointer testBuilder(TestCase::builder(errorCode)); + + + // Pattern: "{Hello, {$userName}!}" + LocalPointer test(testBuilder->setName("testAPI") + .setPattern("{Hello, {$userName}!}") + .setArgument("userName", "John", errorCode) + .setExpected("Hello, John!") + .setLocale(locale, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Pattern: "{Today is {$today ..." + LocalPointer cal(Calendar::createInstance(errorCode)); + // Sunday, October 28, 2136 8:39:12 AM PST + cal->set(2136, Calendar::OCTOBER, 28, 8, 39, 12); + UDate date = cal->getTime(errorCode); + + test.adoptInstead(testBuilder->setName("testAPI") + .setPattern("{Today is {$today :datetime skeleton=yMMMdEEE}.}") + .setDateArgument("today", date, errorCode) + .setExpected("Today is Sun, Oct 28, 2136.") + .setLocale(locale, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Pattern matching - plural + UnicodeString pattern = "match {$photoCount :select} {$userGender :select}\n\ + when 1 masculine {{$userName} added a new photo to his album.}\n \ + when 1 feminine {{$userName} added a new photo to her album.}\n \ + when 1 * {{$userName} added a new photo to their album.}\n \ + when * masculine {{$userName} added {$photoCount} photos to his album.}\n \ + when * feminine {{$userName} added {$photoCount} photos to her album.}\n \ + when * * {{$userName} added {$photoCount} photos to their album.}"; + + + int64_t photoCount = 12; + test.adoptInstead(testBuilder->setName("testAPI") + .setPattern(pattern) + .setArgument("photoCount", photoCount, errorCode) + .setArgument("userGender", "feminine", errorCode) + .setArgument("userName", "Maria", errorCode) + .setExpected("Maria added 12 photos to her album.") + .setLocale(locale, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Built-in functions + pattern = "match {$photoCount :plural} {$userGender :select}\n\ + when 1 masculine {{$userName} added a new photo to his album.}\n \ + when 1 feminine {{$userName} added a new photo to her album.}\n \ + when 1 * {{$userName} added a new photo to their album.}\n \ + when * masculine {{$userName} added {$photoCount} photos to his album.}\n \ + when * feminine {{$userName} added {$photoCount} photos to her album.}\n \ + when * * {{$userName} added {$photoCount} photos to their album.}"; + + photoCount = 1; + test.adoptInstead(testBuilder->setName("testAPI") + .setPattern(pattern) + .setArgument("photoCount", photoCount, errorCode) + .setArgument("userGender", "feminine", errorCode) + .setArgument("userName", "Maria", errorCode) + .setExpected("Maria added a new photo to her album.") + .setLocale(locale, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +// Custom functions example from the ICU4C API design doc +// Note: error/null checks are omitted +void TestMessageFormat2::testAPICustomFunctions() { + IcuTestErrorCode errorCode1(*this, "testAPICustomFunctions"); + UErrorCode errorCode = (UErrorCode) errorCode1; + UParseError parseError; + Locale locale = "en_US"; + + // Set up custom function registry + FunctionRegistry::Builder* builder = FunctionRegistry::builder(errorCode); + // Note that this doesn't use `setDefaultFormatterNameForType()`; not implemented yet + FunctionRegistry* functionRegistry = + builder->setFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + .build(errorCode); + + Person* person = new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe")); + + MessageArguments::Builder* argsBuilder = MessageArguments::builder(errorCode); + argsBuilder->addObject("name", person, errorCode); + MessageArguments* arguments = argsBuilder->build(errorCode); + + MessageFormatter::Builder* mfBuilder = MessageFormatter::builder(errorCode); + UnicodeString result; + // This fails, because we did not provide a function registry: + MessageFormatter* mf = mfBuilder->setPattern("{Hello {$name :person formality=informal}}") + .setLocale(locale) + .build(parseError, errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("testAPICustomFunctions", U_UNKNOWN_FUNCTION_ERROR, errorCode); + + errorCode = U_ZERO_ERROR; + result.remove(); + mfBuilder->setFunctionRegistry(functionRegistry) + .setLocale(locale); + + delete mf; + mf = mfBuilder->setPattern("{Hello {$name :person formality=informal}}") + .build(parseError, errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("testAPICustomFunctions", "Hello John", result); + result.remove(); + + delete mf; + mf = mfBuilder->setPattern("{Hello {$name :person formality=formal}}") + .build(parseError, errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("testAPICustomFunctions", "Hello Mr. Doe", result); + result.remove(); + + delete mf; + mf = mfBuilder->setPattern("{Hello {$name :person formality=formal length=long}}") + .build(parseError, errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("testAPICustomFunctions", "Hello Mr. John Doe", result); + + delete arguments; + delete builder; + delete functionRegistry; + delete person; + delete mf; + delete mfBuilder; + delete argsBuilder; +} + +void TestMessageFormat2::testValidPatterns(const TestResult* patterns, int32_t len, IcuTestErrorCode& errorCode) { + LocalPointer testBuilder(TestCase::builder(errorCode)); + testBuilder->setName("testOtherJsonPatterns"); + + LocalPointer test; + + for (int32_t i = 0; i < len - 1; i++) { + test.adoptInstead(testBuilder->setPattern(patterns[i].pattern) + .setExpected(patterns[i].output) + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + } +} + +void TestMessageFormat2::testResolutionErrors(IcuTestErrorCode& errorCode) { + LocalPointer testBuilder(TestCase::builder(errorCode)); + testBuilder->setName("testResolutionErrorPattern"); + + LocalPointer test; + + for (int32_t i = 0; i < numResolutionErrors - 1; i++) { + test.adoptInstead(testBuilder->setPattern(jsonTestCasesResolutionError[i].pattern) + .setExpected(jsonTestCasesResolutionError[i].output) + .setExpectedError(jsonTestCasesResolutionError[i].expected) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + } +} + +void TestMessageFormat2::testNoSyntaxErrors(const UnicodeString* patterns, int32_t len, IcuTestErrorCode& errorCode) { + LocalPointer testBuilder(TestCase::builder(errorCode)); + testBuilder->setName("testReservedErrorPattern"); + + LocalPointer test; + + for (int32_t i = 0; i < len - 1; i++) { + test.adoptInstead(testBuilder->setPattern(patterns[i]) + .setNoSyntaxError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + } +} + +void TestMessageFormat2::testVariousPatterns() { + IcuTestErrorCode errorCode(*this, "jsonTests"); + + jsonTests(errorCode); + testValidPatterns(validTestCases, numValidTestCases, errorCode); + testResolutionErrors(errorCode); + testNoSyntaxErrors(reservedErrors, numReservedErrors, errorCode); + testNoSyntaxErrors(matches, numMatches, errorCode); + testNoSyntaxErrors(syntaxTests, numSyntaxTests, errorCode); +} + +/* + Tests a single pattern, which is expected to be invalid. + + `testNum`: Test number (only used for diagnostic output) + `s`: The pattern string. + + The error is assumed to be on line 0, offset `s.length()`. +*/ +void TestMessageFormat2::testInvalidPattern(uint32_t testNum, const UnicodeString& s) { + testInvalidPattern(testNum, s, s.length(), 0); +} + +/* + Tests a single pattern, which is expected to be invalid. + + `testNum`: Test number (only used for diagnostic output) + `s`: The pattern string. + + The error is assumed to be on line 0, offset `expectedErrorOffset`. +*/ +void TestMessageFormat2::testInvalidPattern(uint32_t testNum, const UnicodeString& s, uint32_t expectedErrorOffset) { + testInvalidPattern(testNum, s, expectedErrorOffset, 0); +} + +/* + Tests a single pattern, which is expected to be invalid. + + `testNum`: Test number (only used for diagnostic output) + `s`: The pattern string. + `expectedErrorOffset`: The expected character offset for the parse error. + + The error is assumed to be on line `expectedErrorLine`, offset `expectedErrorOffset`. +*/ +void TestMessageFormat2::testInvalidPattern(uint32_t testNum, const UnicodeString& s, uint32_t expectedErrorOffset, uint32_t expectedErrorLine) { + IcuTestErrorCode errorCode(*this, "testInvalidPattern"); + char testName[50]; + snprintf(testName, sizeof(testName), "testInvalidPattern: %d", testNum); + + LocalPointer testBuilder(TestCase::builder(errorCode)); + if (U_FAILURE(errorCode)) { + return; + } + testBuilder->setName("testName"); + + LocalPointer test(testBuilder->setPattern(s) + .setExpectedError(U_SYNTAX_ERROR) + .setExpectedLineNumberAndOffset(expectedErrorLine, expectedErrorOffset) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +/* + Tests a single pattern, which is expected to cause the parser to + emit a data model error + + `testNum`: Test number (only used for diagnostic output) + `s`: The pattern string. + `expectedErrorCode`: the error code expected to be returned by the formatter + + For now, the line and character numbers are not checked +*/ +void TestMessageFormat2::testSemanticallyInvalidPattern(uint32_t testNum, const UnicodeString& s, UErrorCode expectedErrorCode) { + IcuTestErrorCode errorCode(*this, "testInvalidPattern"); + + char testName[50]; + snprintf(testName, sizeof(testName), "testSemanticallyInvalidPattern: %d", testNum); + + LocalPointer testBuilder(TestCase::builder(errorCode)); + testBuilder->setName("testName").setPattern(s); + testBuilder->setExpectedError(expectedErrorCode); + LocalPointer result(testBuilder->build(errorCode)); + CHECK_ERROR(errorCode); + + TestUtils::runTestCase(*this, *result, errorCode); +} + +/* + Tests a single pattern, which is expected to cause the formatter + to emit a resolution error, selection error, or + formatting error + + `testNum`: Test number (only used for diagnostic output) + `s`: The pattern string. + `expectedErrorCode`: the error code expected to be returned by the formatter + + For now, the line and character numbers are not checked +*/ +void TestMessageFormat2::testRuntimeErrorPattern(uint32_t testNum, const UnicodeString& s, UErrorCode expectedErrorCode) { + IcuTestErrorCode errorCode(*this, "testInvalidPattern"); + char testName[50]; + snprintf(testName, sizeof(testName), "testInvalidPattern (errors): %u", testNum); + + LocalPointer testBuilder(TestCase::builder(errorCode)); + + LocalPointer test(testBuilder->setName(testName) + .setPattern(s) + .setExpectedError(expectedErrorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +/* + Tests a single pattern, which is expected to cause the formatter + to emit a resolution error, selection error, or + formatting error + + `testNum`: Test number (only used for diagnostic output) + `s`: The pattern string. + `expectedErrorCode`: the error code expected to be returned by the formatter + + For now, the line and character numbers are not checked +*/ +void TestMessageFormat2::testRuntimeWarningPattern(uint32_t testNum, const UnicodeString& s, const UnicodeString& expectedResult, UErrorCode expectedErrorCode) { + IcuTestErrorCode errorCode(*this, "testInvalidPattern"); + char testName[50]; + snprintf(testName, sizeof(testName), "testInvalidPattern (warnings): %u", testNum); + + LocalPointer testBuilder(TestCase::builder(errorCode)); + LocalPointer test(testBuilder->setName(testName) + .setPattern(s) + .setExpected(expectedResult) + .setExpectedError(expectedErrorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testDataModelErrors() { + uint32_t i = 0; + IcuTestErrorCode errorCode(*this, "testDataModelErrors"); + + // The following tests are syntactically valid but should trigger a data model error + + // Examples taken from https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md + + // Variant key mismatch + testSemanticallyInvalidPattern(++i, "match {$foo :plural} {$bar :plural} when one{one}", U_VARIANT_KEY_MISMATCH_ERROR); + testSemanticallyInvalidPattern(++i, "match {$foo :plural} {$bar :plural} when one {one}", U_VARIANT_KEY_MISMATCH_ERROR); + testSemanticallyInvalidPattern(++i, "match {$foo :plural} {$bar :plural} when one {one}", U_VARIANT_KEY_MISMATCH_ERROR); + + testSemanticallyInvalidPattern(++i, "match {$foo :plural} when * * {foo}", U_VARIANT_KEY_MISMATCH_ERROR); + testSemanticallyInvalidPattern(++i, "match {$one :plural}\n\ + when 1 2 {Too many}\n\ + when * {Otherwise}", U_VARIANT_KEY_MISMATCH_ERROR); + testSemanticallyInvalidPattern(++i, "match {$one :plural} {$two :plural}\n\ + when 1 2 {Two keys}\n\ + when * {Missing a key}\n\ + when * * {Otherwise}", U_VARIANT_KEY_MISMATCH_ERROR); + + // Non-exhaustive patterns + testSemanticallyInvalidPattern(++i, "match {$one :plural}\n\ + when 1 {Value is one}\n\ + when 2 {Value is two}\n", U_NONEXHAUSTIVE_PATTERN_ERROR); + testSemanticallyInvalidPattern(++i, "match {$one :plural} {$two :plural}\n\ + when 1 * {First is one}\n\ + when * 1 {Second is one}\n", U_NONEXHAUSTIVE_PATTERN_ERROR); + + // Duplicate option names + testSemanticallyInvalidPattern(++i, "{{:foo a=1 b=2 a=1}}", U_DUPLICATE_OPTION_NAME_ERROR); + testSemanticallyInvalidPattern(++i, "{{:foo a=1 a=1}}", U_DUPLICATE_OPTION_NAME_ERROR); + testSemanticallyInvalidPattern(++i, "{{:foo a=1 a=2}}", U_DUPLICATE_OPTION_NAME_ERROR); + testSemanticallyInvalidPattern(++i, "{{|x| :foo a=1 a=2}}", U_DUPLICATE_OPTION_NAME_ERROR); + + // Missing selector annotation + testSemanticallyInvalidPattern(++i, "match {$one}\n\ + when 1 {Value is one}\n\ + when * {Value is not one}\n", U_MISSING_SELECTOR_ANNOTATION_ERROR); + testSemanticallyInvalidPattern(++i, "let $one = {|The one|}\n\ + match {$one}\n\ + when 1 {Value is one}\n\ + when * {Value is not one}\n", U_MISSING_SELECTOR_ANNOTATION_ERROR); + testSemanticallyInvalidPattern(++i, "match {|horse| ^private}\n\ + when 1 {The value is one.}\n \ + when * {The value is not one.}\n", U_MISSING_SELECTOR_ANNOTATION_ERROR); + testSemanticallyInvalidPattern(++i, "match {$foo !select} when |1| {one} when * {other}", + U_MISSING_SELECTOR_ANNOTATION_ERROR); + testSemanticallyInvalidPattern(++i, "match {$foo ^select} when |1| {one} when * {other}", + U_MISSING_SELECTOR_ANNOTATION_ERROR); + + LocalPointer testBuilder(TestCase::builder(errorCode)); + testBuilder->setName("testDataModelErrors"); + + // This should *not* trigger a "missing selector annotation" error + LocalPointer test; + test.adoptInstead(testBuilder->setPattern("let $one = {|The one| :select}\n\ + match {$one}\n\ + when 1 {Value is one}\n\ + when * {Value is not one}") + .setExpected("Value is not one") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $one = {|The one| :select}\n\ + let $two = {$one}\n\ + match {$two}\n\ + when 1 {Value is one}\n\ + when * {Value is not one}") + .setExpected("Value is not one") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testResolutionErrors() { + uint32_t i = 0; + + // The following tests are syntactically valid and free of data model errors, + // but should trigger a resolution error + + // Unresolved variable + testRuntimeWarningPattern(++i, "{{$oops}}", "{$oops}", U_UNRESOLVED_VARIABLE_ERROR); + testRuntimeWarningPattern(++i, "let $x = {$forward} let $forward = {42} {{$x}}", "{$forward}", U_UNRESOLVED_VARIABLE_ERROR); + + // Unknown function + testRuntimeWarningPattern(++i, "{The value is {horse :func}.}", "The value is {|horse|}.", U_UNKNOWN_FUNCTION_ERROR); + testRuntimeWarningPattern(++i, "match {|horse| :func}\n\ + when 1 {The value is one.}\n\ + when * {The value is not one.}\n", + "The value is not one.", U_UNKNOWN_FUNCTION_ERROR); + // Using formatter as selector + // The fallback string will match the '*' variant + testRuntimeWarningPattern(++i, "match {|horse| :number}\n\ + when 1 {The value is one.}\n\ + when * {The value is not one.}\n", "The value is not one.", U_SELECTOR_ERROR); + + // Using selector as formatter + testRuntimeWarningPattern(++i, "match {|horse| :select}\n\ + when 1 {The value is one.}\n \ + when * {{|horse| :select}}\n", + "{|horse|}", U_FORMATTING_ERROR); + + // Unsupported expressions + testRuntimeErrorPattern(++i, "{The value is {@horse}.}", U_UNSUPPORTED_PROPERTY); + testRuntimeErrorPattern(++i, "{hello {|4.2| @number}}", U_UNSUPPORTED_PROPERTY); + testRuntimeErrorPattern(++i, "{{Original JSON file + here.

+ Some have been modified or added to reflect syntax changes that post-date the JSON file. + + */ + uint32_t i = 0; + + // Unexpected end of input + testInvalidPattern(++i, "let "); + testInvalidPattern(++i, "le"); + testInvalidPattern(++i, "let $foo"); + testInvalidPattern(++i, "let $foo = "); + testInvalidPattern(++i, "{{:fszzz"); + testInvalidPattern(++i, "{{:fszzz "); + testInvalidPattern(++i, "match {$foo} when |xyz"); + testInvalidPattern(++i, "{{:f aaa"); + testInvalidPattern(++i, "{missing end brace"); + testInvalidPattern(++i, "{missing end {$brace"); + + // Error should be reported at character 0, not end of input + testInvalidPattern(++i, "}{|xyz|", 0); + testInvalidPattern(++i, "}", 0); + + // @xyz is a valid annotation (`reserved`) so the error should be at the end of input + testInvalidPattern(++i, "{{@xyz"); + // Backslash followed by non-backslash followed by a '{' -- this should be an error + // immediately after the first backslash + testInvalidPattern(++i, "{{@\\y{}}", 4); + + // Reserved chars followed by a '|' that doesn't begin a valid literal -- this should be + // an error at the first invalid char in the literal + testInvalidPattern(++i, "{{@abc|\\z}}", 8); + + // Same pattern, but with a valid reserved-char following the erroneous reserved-escape + // -- the offset should be the same as with the previous one + testInvalidPattern(++i, "{{@\\y{p}}", 4); + // Erroneous literal inside a reserved string -- the error should be at the first + // erroneous literal char + testInvalidPattern(++i, "{{@ab|\\z|cd}}", 7); + + // tests for reserved syntax with bad escaped chars + // Single backslash - not allowed + testInvalidPattern(++i, "{hello {|4.2| @num\\ber}}", 19); + // Unescaped '{' -- not allowed + testInvalidPattern(++i, "{hello {|4.2| @num{be\\|r}}", 18); + // Unescaped '}' -- will be interpreted as the end of the reserved + // string, and the error will be reported at the index of '|', which is + // when the parser determines that "\|" isn't a valid text-escape + testInvalidPattern(++i, "{hello {|4.2| @num}be\\|r}}", 22); + // Unescaped '|' -- will be interpreted as the beginning of a literal + // Error at end of input + testInvalidPattern(++i, "{hello {|4.2| @num\\{be|r}}", 26); + + // Invalid escape sequence in a `text` -- the error should be at the character + // following the backslash + testInvalidPattern(++i, "{a\\qbc", 3); + + // Missing space after `when` -- the error should be immediately after the + // `when` (not the error in the pattern) + testInvalidPattern(++i, "match {|y|} when|y| {|||}", 16); + + // Missing spaces betwen keys in `when`-clause + testInvalidPattern(++i, "match {|y|} when |foo|bar {a}", 22); + testInvalidPattern(++i, "match {|y|} when |quux| |foo|bar {a}", 29); + testInvalidPattern(++i, "match {|y|} when |quux| |foo||bar| {a}", 29); + + // Error parsing the first key -- the error should be there, not in the + // also-erroneous third key + testInvalidPattern(++i, "match {|y|} when |\\q| * @{! {z}", 19); + + // Error parsing the second key -- the error should be there, not in the + // also-erroneous third key + testInvalidPattern(++i, "match {|y|} when * @{! {z} |\\q|", 19); + + // Error parsing the last key -- the error should be there, not in the erroneous + // pattern + testInvalidPattern(++i, "match {|y|} when * |\\q| {\\z}", 21); + + // Selectors not starting with `match` -- error should be on character 1, + // not the later erroneous key + testInvalidPattern(++i, "m {|y|} when @{! {z}", 1); + + // Non-expression as scrutinee in pattern -- error should be at the first + // non-expression, not the later non-expression + testInvalidPattern(++i, "match {|y|} {\\|} {@} when * * * {a}", 13); + + // Non-key in variant -- error should be there, not in the next erroneous + // variant + testInvalidPattern(++i, "match {|y|} when $foo * {a} when * :bar {b}", 17); + + + // Error should be within the first erroneous `text` or expression + testInvalidPattern(++i, "{ foo {|bar|} \\q baz ", 15); + + // ':' has to be followed by a function name -- the error should be at the first + // whitespace character + testInvalidPattern(++i, "{{: }}", 3); + + // Expression not starting with a '{' + testInvalidPattern(++i, "let $x = }|foo|}", 9); + + // Error should be at the first declaration not starting with a `let` + testInvalidPattern(++i, "let $x = {|foo|} l $y = {|bar|} let $z {|quux|}", 18); + + // Missing '=' in `let` declaration + testInvalidPattern(++i, "let $bar {|foo|} {{$bar}}", 9); + + // LHS of declaration doesn't start with a '$' + testInvalidPattern(++i, "let bar = {|foo|} {{$bar}}", 4); + + // `let` RHS isn't an expression + testInvalidPattern(++i, "let $bar = |foo| {{$bar}}", 11); + + // Non-expression + testInvalidPattern(++i, "no braces", 0); + testInvalidPattern(++i, "no braces {$foo}", 0); + + // Trailing characters that are not whitespace + testInvalidPattern(++i, "{extra} content", 8); + testInvalidPattern(++i, "match {|x|} when * {foo} extra", 25); + + // Empty expression + testInvalidPattern(++i, "{empty { }}", 9); + testInvalidPattern(++i, "match {} when * {foo}", 7); + + // ':' not preceding a function name + testInvalidPattern(++i, "{bad {:}}", 7); + + // Missing '=' after option name + testInvalidPattern(++i, "{no-equal {|42| :number m }}", 26); + testInvalidPattern(++i, "{no-equal {|42| :number minimumFractionDigits 2}}", 46); + testInvalidPattern(++i, "{bad {:placeholder option value}}", 26); + + // Extra '=' after option value + testInvalidPattern(++i, "{hello {|4.2| :number min=2=3}}", 27), + testInvalidPattern(++i, "{hello {|4.2| :number min=2max=3}}", 30), + // Missing whitespace between valid options + testInvalidPattern(++i, "{hello {|4.2| :number min=|a|max=3}}", 29), + // Ill-formed RHS of option -- the error should be within the RHS, + // not after parsing options + testInvalidPattern(++i, "{hello {|4.2| :number min=|\\a|}}", 28), + + + // Junk after annotation + testInvalidPattern(++i, "{no-equal {|42| :number {}", 26); + + // Missing RHS of option + testInvalidPattern(++i, "{bad {:placeholder option=}}", 26); + testInvalidPattern(++i, "{bad {:placeholder option}}", 25); + + // Annotation is not a function or reserved text + testInvalidPattern(++i, "{bad {$placeholder option}}", 19); + testInvalidPattern(++i, "{no {$placeholder end}", 18); + + // Missing whitespace before key in variant + testInvalidPattern(++i, "match {|foo|} when*{foo}", 18); + // Missing expression in selectors + testInvalidPattern(++i, "match when * {foo}", 6); + // Non-expression in selectors + testInvalidPattern(++i, "match |x| when * {foo}", 6); + + // Missing RHS in variant + testInvalidPattern(++i, "match {|x|} when * foo"); + + // Text may include newlines; check that the missing closing '}' is + // reported on the correct line + testInvalidPattern(++i, "{hello wo\nrld", 3, 1); + testInvalidPattern(++i, "{hello wo\nr\nl\ndddd", 4, 3); + // Offset for end-of-input should be 0 here because the line begins + // after the '\n', but there is no character after the '\n' + testInvalidPattern(++i, "{hello wo\nr\nl\n", 0, 3); + + // Literals may include newlines; check that the missing closing '|' is + // reported on the correct line + testInvalidPattern(++i, "{hello {|wo\nrld}", 4, 1); + testInvalidPattern(++i, "{hello {|wo\nr\nl\ndddd}", 5, 3); + // Offset for end-of-input should be 0 here because the line begins + // after the '\n', but there is no character after the '\n' + testInvalidPattern(++i, "{hello {|wo\nr\nl\n", 0, 3); + + // Variable names can't start with a : or - + testInvalidPattern(++i, "{{$:abc}}", 3); + testInvalidPattern(++i, "{{$-abc}}", 3); + + // Missing space before annotation + // Note that {{$bar:foo}} and {{$bar-foo}} are valid, + // because variable names can contain a ':' or a '-' + testInvalidPattern(++i, "{{$bar+foo}}", 6); + testInvalidPattern(++i, "{{|3.14|:foo}}", 8); + testInvalidPattern(++i, "{{|3.14|-foo}}", 8); + testInvalidPattern(++i, "{{|3.14|+foo}}", 8); + + // Unquoted literals can't begin with a ':' + testInvalidPattern(++i, "let $foo = {$bar} match {$foo} when :one {one} when * {other}", 36); + testInvalidPattern(++i, "let $foo = {$bar :fun option=:a} {bar {$foo}}", 29); + +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ + diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h new file mode 100644 index 000000000000..720b75b387d0 --- /dev/null +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -0,0 +1,222 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef _TESTMESSAGEFORMAT2 +#define _TESTMESSAGEFORMAT2 + +#include "unicode/rep.h" +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "messageformat2test_utils.h" +#include "unicode/messageformat2_function_registry.h" +#include "unicode/messageformat2.h" +#include "unicode/unistr.h" +#include "unicode/fmtable.h" +#include "unicode/parseerr.h" +#include "intltest.h" + +/** + * TestMessageFormat2 tests MessageFormat2 + */ + +U_NAMESPACE_BEGIN namespace message2 { + +struct TestResult { + const UnicodeString pattern; + const UnicodeString output; +}; + +struct TestResultError { + const UnicodeString pattern; + const UnicodeString output; + UErrorCode expected; +}; + +class TestMessageFormat2: public IntlTest { +public: + void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ) override; + + /** + * test MessageFormat2 with various given patterns + **/ + void testVariousPatterns(void); + void featureTests(void); + void messageFormat1Tests(void); + void testAPICustomFunctions(void); + // Test custom functions + void testCustomFunctions(void); + // Test standard functions + void testBuiltInFunctions(void); + void testDataModelErrors(void); + void testResolutionErrors(void); + // Test the data model API + void testAPI(void); + void testInvalidPatterns(void); + void testAPISimple(void); + +private: + void testSemanticallyInvalidPattern(uint32_t, const UnicodeString&, UErrorCode); + void testRuntimeErrorPattern(uint32_t, const UnicodeString&, UErrorCode); + void testRuntimeWarningPattern(uint32_t, const UnicodeString&, const UnicodeString&, UErrorCode); + void testInvalidPattern(uint32_t, const UnicodeString&); + void testInvalidPattern(uint32_t, const UnicodeString&, uint32_t); + void testInvalidPattern(uint32_t, const UnicodeString&, uint32_t, uint32_t); + void testValidPatterns(const TestResult*, int32_t, IcuTestErrorCode&); + void testResolutionErrors(IcuTestErrorCode&); + void testNoSyntaxErrors(const UnicodeString*, int32_t, IcuTestErrorCode&); + void jsonTests(IcuTestErrorCode&); + + // Built-in function testing + void testDateTime(IcuTestErrorCode&); + void testNumbers(IcuTestErrorCode&); + + // Custom function testing + void testPersonFormatter(IcuTestErrorCode&); + void testCustomFunctionsComplexMessage(IcuTestErrorCode&); + void testGrammarCasesFormatter(IcuTestErrorCode&); + void testListFormatter(IcuTestErrorCode&); + void testMessageRefFormatter(IcuTestErrorCode&); + + // Feature tests + void testEmptyMessage(TestCase::Builder&, IcuTestErrorCode&); + void testPlainText(TestCase::Builder&, IcuTestErrorCode&); + void testPlaceholders(TestCase::Builder&, IcuTestErrorCode&); + void testArgumentMissing(TestCase::Builder&, IcuTestErrorCode&); + void testDefaultLocale(TestCase::Builder&, IcuTestErrorCode&); + void testSpecialPluralWithDecimals(TestCase::Builder&, IcuTestErrorCode&); + void testDefaultFunctionAndOptions(TestCase::Builder&, IcuTestErrorCode&); + void testSimpleSelection(TestCase::Builder&, IcuTestErrorCode&); + void testComplexSelection(TestCase::Builder&, IcuTestErrorCode&); + void testSimpleLocalVariable(TestCase::Builder&, IcuTestErrorCode&); + void testLocalVariableWithSelect(TestCase::Builder&, IcuTestErrorCode&); + void testDateFormat(TestCase::Builder&, IcuTestErrorCode&); + void testPlural(TestCase::Builder&, IcuTestErrorCode&); + + void testPluralOrdinal(TestCase::Builder&, IcuTestErrorCode&); + void testFormatterIsCreatedOnce(IcuTestErrorCode&); + void testPluralWithOffset(TestCase::Builder&, IcuTestErrorCode&); + void testPluralWithOffsetAndLocalVar(TestCase::Builder&, IcuTestErrorCode&); + void testDeclareBeforeUse(TestCase::Builder&, IcuTestErrorCode&); + void testVariableOptionsInSelector(TestCase::Builder&, IcuTestErrorCode&); + void testVariableOptionsInSelectorWithLocalVar(TestCase::Builder&, IcuTestErrorCode&); + + // MessageFormat 1 tests + void testSample(TestCase::Builder&, IcuTestErrorCode&); + void testStaticFormat(TestCase::Builder&, IcuTestErrorCode&); + void testSimpleFormat(TestCase::Builder&, IcuTestErrorCode&); + void testSelectFormatToPattern(TestCase::Builder&, IcuTestErrorCode&); + void testMessageFormatDateTimeSkeleton(TestCase::Builder&, IcuTestErrorCode&); + void testMf1Behavior(TestCase::Builder&, IcuTestErrorCode&); + +}; // class TestMessageFormat2 + + +// Custom function classes +class PersonNameFormatterFactory : public FormatterFactory { + + public: + Formatter* createFormatter(const Locale&, UErrorCode&) override; +}; + +class Person : public UObject { + public: + UnicodeString title; + UnicodeString firstName; + UnicodeString lastName; + Person(UnicodeString t, UnicodeString f, UnicodeString l) : title(t), firstName(f), lastName(l) {} + ~Person(); +}; + +class PersonNameFormatter : public Formatter { + public: + void format(FormattingContext&, UErrorCode& errorCode) const override; +}; + +class GrammarCasesFormatterFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale&, UErrorCode&) override; +}; + +class GrammarCasesFormatter : public Formatter { + public: + void format(FormattingContext&, UErrorCode& errorCode) const override; + static FunctionRegistry* customRegistry(UErrorCode&); + private: + void getDativeAndGenitive(const UnicodeString&, UnicodeString& result) const; +}; + +class ListFormatterFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale&, UErrorCode&) override; +}; + +class ListFormatter : public Formatter { + public: + void format(FormattingContext&, UErrorCode& errorCode) const override; + static FunctionRegistry* customRegistry(UErrorCode&); + private: + friend class ListFormatterFactory; + const Locale& locale; + ListFormatter(const Locale& loc) : locale(loc) {} +}; + +class ResourceManagerFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale&, UErrorCode&) override; +}; + +class ResourceManager : public Formatter { + public: + void format(FormattingContext&, UErrorCode& errorCode) const override; + static FunctionRegistry* customRegistry(UErrorCode&); + static Hashtable* properties(UErrorCode&); + static UnicodeString propertiesAsString(const Hashtable&); + static Hashtable* parseProperties(const UnicodeString&, UErrorCode&); + + private: + friend class ResourceManagerFactory; + ResourceManager(const Locale& loc) : locale(loc) {} + const Locale& locale; +}; + +class TemperatureFormatterFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale&, UErrorCode&) override; + TemperatureFormatterFactory() : constructCount(0), formatCount(0), fFormatterCount(0), cFormatterCount(0) {} + + int32_t constructCount; + int32_t formatCount; + int32_t fFormatterCount; + int32_t cFormatterCount; +}; + +class TemperatureFormatter : public Formatter { + public: + void format(FormattingContext&, UErrorCode& errorCode) const override; + static FunctionRegistry* customRegistry(UErrorCode&); + ~TemperatureFormatter(); + private: + friend class TemperatureFormatterFactory; + const Locale& locale; + TemperatureFormatterFactory& counter; + LocalPointer cachedFormatters; + + TemperatureFormatter(const Locale&, TemperatureFormatterFactory&, UErrorCode&); +}; + +// Custom function test utilities +class SplitString { + public: + static const uint32_t FIRST = 0; + static const uint32_t LAST = -1; + static bool nextPart(const UnicodeString&, UnicodeString&, uint32_t&); +}; + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/icu4c/source/test/intltest/messageformat2test_builtin.cpp b/icu4c/source/test/intltest/messageformat2test_builtin.cpp new file mode 100644 index 000000000000..5e38bb996b48 --- /dev/null +++ b/icu4c/source/test/intltest/messageformat2test_builtin.cpp @@ -0,0 +1,276 @@ +// © 2016 and later: Unicode, Inc. and others. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "messageformat2test.h" + +using namespace icu::message2; + +/* +Tests reflect the syntax specified in + + https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf + +as of the following commit from 2023-05-09: + https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867 +*/ + +void TestMessageFormat2::testDateTime(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer cal(Calendar::createInstance(errorCode)); + LocalPointer testBuilder(TestCase::builder(errorCode)); + CHECK_ERROR(errorCode); + + testBuilder->setName("testDateTime"); + // November 23, 2022 at 7:42:37.123 PM + cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37); + UDate TEST_DATE = cal->getTime(errorCode); + UnicodeString date = "date"; + testBuilder->setLocale(Locale("ro"), errorCode); + + LocalPointer test(testBuilder->setPattern("{Testing date formatting: {$date :datetime}.}") + .setExpected("Testing date formatting: 23.11.2022, 19:42.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + testBuilder->setLocale(Locale("ro", "RO"), errorCode); + + // Skeleton + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime skeleton=yMMMMd}.}") + .setExpected("Testing date formatting: 23 noiembrie 2022.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime skeleton=jm}.}") + .setExpected("Testing date formatting: 19:42.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + testBuilder->setLocale(Locale("en"), errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime skeleton=yMMMd}.}") + .setExpected("Testing date formatting: Nov 23, 2022.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime skeleton=yMMMdjms}.}") + .setExpected("Testing date formatting: Nov 23, 2022, 7:42:37\u202FPM.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime skeleton=jm}.}") + .setExpected("Testing date formatting: 7:42\u202FPM.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Style + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime datestyle=long}.}") + .setExpected("Testing date formatting: November 23, 2022.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime datestyle=medium}.}") + .setExpected("Testing date formatting: Nov 23, 2022.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime datestyle=short}.}") + .setExpected("Testing date formatting: 11/23/22.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime timestyle=long}.}") + .setExpected("Testing date formatting: 7:42:37\u202FPM PST.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime timestyle=medium}.}") + .setExpected("Testing date formatting: 7:42:37\u202FPM.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime timestyle=short}.}") + .setExpected("Testing date formatting: 7:42\u202FPM.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Pattern + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {$date :datetime pattern=|d 'of' MMMM, y 'at' HH:mm|}.}") + .setExpected("Testing date formatting: 23 of November, 2022 at 19:42.") + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Error cases + // Number as argument + test.adoptInstead(testBuilder->setPattern("let $num = {|42| :number}\n\ + {Testing date formatting: {$num :datetime}}") + .clearArguments(errorCode) + .setExpected("Testing date formatting: {|42|}") + .setExpectedError(U_FORMATTING_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + // Literal string as argument + test.adoptInstead(testBuilder->setPattern("{Testing date formatting: {|horse| :datetime}}") + .setExpected("Testing date formatting: |horse|") + .setExpectedError(U_FORMATTING_ERROR) + .build(errorCode)); + // Formatted string as argument + test.adoptInstead(testBuilder->setPattern("let $dateStr = {$date :datetime}\n\ + {Testing date formatting: {$dateStr :datetime}}") + .setExpected("Testing date formatting: {$date}") + .setExpectedError(U_FORMATTING_ERROR) + .setDateArgument(date, TEST_DATE, errorCode) + .build(errorCode)); + + TestUtils::runTestCase(*this, *test, errorCode); + +} + +void TestMessageFormat2::testNumbers(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + double value = 1234567890.97531; + UnicodeString val = "val"; + + LocalPointer testBuilder(TestCase::builder(errorCode)); + CHECK_ERROR(errorCode); + testBuilder->setName("testNumbers"); + + // Literals + LocalPointer test(testBuilder->setPattern("{From literal: {123456789 :number}!}") + .setArgument(val, value, errorCode) + .setExpected("From literal: 123.456.789!") + .setLocale(Locale("ro"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{From literal: {|123456789,531| :number}!}") + .setArgument(val, value, errorCode) + .setExpected("From literal: 123.456.789,531!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{From literal: {|\u1041\u1042\u1043,\u1044\u1045\u1046,\u1047\u1048\u1049.\u1045\u1043\u1041| :number}!}") + .setArgument(val, value, errorCode) + .setExpected("From literal: \u1041\u1042\u1043,\u1044\u1045\u1046,\u1047\u1048\u1049.\u1045\u1043\u1041!") + .setLocale(Locale("my"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + + // Testing that the detection works for various types (without specifying :number) + test.adoptInstead(testBuilder->setPattern("{Default double: {$val}!}") + .setLocale(Locale("en", "IN"), errorCode) + .setArgument(val, value, errorCode) + .setExpected("Default double: 1,23,45,67,890.97531!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setPattern("{Default double: {$val}!}") + .setLocale(Locale("ro"), errorCode) + .setArgument(val, value, errorCode) + .setExpected("Default double: 1.234.567.890,97531!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setPattern("{Default float: {$val}!}") + .setLocale(Locale("ro"), errorCode) + .setArgument(val, 3.1415926535, errorCode) + .setExpected("Default float: 3,141593!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setPattern("{Default int64: {$val}!}") + .setLocale(Locale("ro"), errorCode) + .setArgument(val, (int64_t) 1234567890123456789, errorCode) + .setExpected("Default int64: 1.234.567.890.123.456.789!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setPattern("{Default number: {$val}!}") + .setLocale(Locale("ro"), errorCode) + .setDecimalArgument(val, StringPiece("1234567890123456789.987654321"), errorCode) + .setExpected("Default number: 1.234.567.890.123.456.789,987654!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Omitted CurrencyAmount test from ICU4J since it's not supported by Formattable + + // Skeletons + value = 1234567890.97531; + testBuilder->setLocale(Locale("ro"), errorCode); + test.adoptInstead(testBuilder->setPattern("{Skeletons, minFraction: {$val :number skeleton=|.00000000*|}!}") + .setArgument(val, value, errorCode) + .setExpected("Skeletons, minFraction: 1.234.567.890,97531000!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Skeletons, maxFraction: {$val :number skeleton=|.###|}!}") + .setArgument(val, value, errorCode) + .setExpected("Skeletons, maxFraction: 1.234.567.890,975!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Currency + test.adoptInstead(testBuilder->setPattern("{Skeletons, currency: {$val :number skeleton=|currency/EUR|}!}") + .setArgument(val, value, errorCode) + .setExpected("Skeletons, currency: 1.234.567.890,98\u00A0\u20AC!") + .setLocale(Locale("de"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Currency as a parameter + test.adoptInstead(testBuilder->setPattern("{Skeletons, currency: {$val :number skeleton=$skel}!}") + .setArgument(val, value, errorCode) + .setArgument("skel", "currency/EUR", errorCode) + .setExpected("Skeletons, currency: 1.234.567.890,98\u00A0\u20AC!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Skeletons, currency: {$val :number skeleton=$skel}!}") + .setArgument(val, value, errorCode) + .setArgument("skel", "currency/JPY", errorCode) + .setExpected("Skeletons, currency: 1.234.567.891\u00A0\u00A5!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Various measures + test.adoptInstead(testBuilder->setPattern("let $intl = {$valC :number skeleton=|unit/celsius|}\n\ + let $us = {$valF :number skeleton=|unit/fahrenheit|}\n\ + {Temperature: {$intl} ({$us})}") + .setArgument("valC", 27.0, errorCode) + .setArgument("valF", 80.6, errorCode) + .setExpected("Temperature: 27 \u00B0C (80,6 \u00B0F)") + .setLocale(Locale("ro"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Height: {$len :number skeleton=|unit/meter|}}") + .setArgument("len", 1.75, errorCode) + .setExpected("Height: 1,75 m") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testBuiltInFunctions() { + IcuTestErrorCode errorCode(*this, "testBuiltInFunctions"); + + testDateTime(errorCode); + testNumbers(errorCode); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp new file mode 100644 index 000000000000..00b139eaa12a --- /dev/null +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -0,0 +1,761 @@ +// © 2016 and later: Unicode, Inc. and others. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "plurrule_impl.h" + +#include "unicode/listformatter.h" +#include "unicode/messageformat2.h" +#include "intltest.h" +#include "messageformat2test.h" + +using namespace message2; +using namespace pluralimpl; + +/* +Tests reflect the syntax specified in + + https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf + +as of the following commit from 2023-05-09: + https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867 +*/ + +static FunctionRegistry* personFunctionRegistry(UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + + LocalPointer builder(FunctionRegistry::builder(errorCode)); + if (U_FAILURE(errorCode)) { + return nullptr; + } + return builder->setFormatter(FunctionName("person"), new PersonNameFormatterFactory(), errorCode) + .build(errorCode); +} + +void TestMessageFormat2::testPersonFormatter(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer customRegistry(personFunctionRegistry(errorCode)); + UnicodeString name = "name"; + LocalPointer person(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe"))); + LocalPointer testBuilder(TestCase::builder(errorCode)); + CHECK_ERROR(errorCode); + testBuilder->setName("testPersonFormatter"); + testBuilder->setLocale(Locale("en"), errorCode); + + LocalPointer test(testBuilder->setPattern("{Hello {$name :person formality=formal}}") + .setArgument(name, person.getAlias(), errorCode) + .setExpected("Hello {$name}") + .setExpectedError(U_UNKNOWN_FUNCTION_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Hello {$name :person formality=informal}}") + .setArgument(name, person.getAlias(), errorCode) + .setExpected("Hello {$name}") + .setExpectedError(U_UNKNOWN_FUNCTION_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + testBuilder->setFunctionRegistry(customRegistry.orphan()); + + test.adoptInstead(testBuilder->setPattern("{Hello {$name :person formality=formal}}") + .setArgument(name, person.getAlias(), errorCode) + .setExpected("Hello Mr. Doe") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Hello {$name :person formality=informal}}") + .setArgument(name, person.getAlias(), errorCode) + .setExpected("Hello John") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Hello {$name :person formality=formal length=long}}") + .setArgument(name, person.getAlias(), errorCode) + .setExpected("Hello Mr. John Doe") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Hello {$name :person formality=formal length=medium}}") + .setArgument(name, person.getAlias(), errorCode) + .setExpected("Hello John Doe") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{Hello {$name :person formality=formal length=short}}") + .setArgument(name, person.getAlias(), errorCode) + .setExpected("Hello Mr. Doe") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testCustomFunctionsComplexMessage(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer customRegistry(personFunctionRegistry(errorCode)); + UnicodeString host = "host"; + UnicodeString hostGender = "hostGender"; + UnicodeString guest = "guest"; + UnicodeString guestCount = "guestCount"; + + LocalPointer jane(new Person(UnicodeString("Ms."), UnicodeString("Jane"), UnicodeString("Doe"))); + LocalPointer john(new Person(UnicodeString("Mr."), UnicodeString("John"), UnicodeString("Doe"))); + LocalPointer anonymous(new Person(UnicodeString("Mx."), UnicodeString("Anonymous"), UnicodeString("Doe"))); + if (!jane.isValid() || !john.isValid() || !anonymous.isValid()) { + ((UErrorCode&) errorCode) = U_MEMORY_ALLOCATION_ERROR; + return; + } + + UnicodeString message = "let $hostName = {$host :person length=long}\n\ + let $guestName = {$guest :person length=long}\n\ + let $guestsOther = {$guestCount :number offset=1}\n\ + match {$hostGender :gender} {$guestCount :plural}\n\ + when female 0 {{$hostName} does not give a party.}\n\ + when female 1 {{$hostName} invites {$guestName} to her party.}\n\ + when female 2 {{$hostName} invites {$guestName} and one other person to her party.}\n\ + when female * {{$hostName} invites {$guestName} and {$guestsOther} other people to her party.}\n\ + when male 0 {{$hostName} does not give a party.}\n\ + when male 1 {{$hostName} invites {$guestName} to his party.}\n\ + when male 2 {{$hostName} invites {$guestName} and one other person to his party.}\n\ + when male * {{$hostName} invites {$guestName} and {$guestsOther} other people to his party.}\n\ + when * 0 {{$hostName} does not give a party.}\n\ + when * 1 {{$hostName} invites {$guestName} to their party.}\n\ + when * 2 {{$hostName} invites {$guestName} and one other person to their party.}\n\ + when * * {{$hostName} invites {$guestName} and {$guestsOther} other people to their party.}\n"; + + + LocalPointer testBuilder(TestCase::builder(errorCode)); + CHECK_ERROR(errorCode); + testBuilder->setName("testCustomFunctionsComplexMessage"); + testBuilder->setLocale(Locale("en"), errorCode); + testBuilder->setPattern(message); + testBuilder->setFunctionRegistry(customRegistry.orphan()); + + LocalPointer test(testBuilder->setArgument(host, jane.getAlias(), errorCode) + .setArgument(hostGender, "female", errorCode) + .setArgument(guest, john.getAlias(), errorCode) + .setArgument(guestCount, (int64_t) 3, errorCode) + .setExpected("Ms. Jane Doe invites Mr. John Doe and 2 other people to her party.") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument(host, jane.getAlias(), errorCode) + .setArgument(hostGender, "female", errorCode) + .setArgument(guest, john.getAlias(), errorCode) + .setArgument(guestCount, (int64_t) 2, errorCode) + .setExpected("Ms. Jane Doe invites Mr. John Doe and one other person to her party.") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument(host, jane.getAlias(), errorCode) + .setArgument(hostGender, "female", errorCode) + .setArgument(guest, john.getAlias(), errorCode) + .setArgument(guestCount, (int64_t) 1, errorCode) + .setExpected("Ms. Jane Doe invites Mr. John Doe to her party.") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument(host, john.getAlias(), errorCode) + .setArgument(hostGender, "male", errorCode) + .setArgument(guest, jane.getAlias(), errorCode) + .setArgument(guestCount, (int64_t) 3, errorCode) + .setExpected("Mr. John Doe invites Ms. Jane Doe and 2 other people to his party.") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument(host, anonymous.getAlias(), errorCode) + .setArgument(hostGender, "unknown", errorCode) + .setArgument(guest, jane.getAlias(), errorCode) + .setArgument(guestCount, (int64_t) 2, errorCode) + .setExpected("Mx. Anonymous Doe invites Ms. Jane Doe and one other person to their party.") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testCustomFunctions() { + IcuTestErrorCode errorCode(*this, "testCustomFunctions"); + + testPersonFormatter(errorCode); + testCustomFunctionsComplexMessage(errorCode); + testGrammarCasesFormatter(errorCode); + testListFormatter(errorCode); + testMessageRefFormatter(errorCode); +} + + +// -------------- Custom function implementations + +Formatter* PersonNameFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + + // Locale not used + (void) locale; + + Formatter* result = new PersonNameFormatter(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +void PersonNameFormatter::format(FormattingContext& context, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + if (!context.hasObjectInput()) { + return; + } + + UnicodeString formalityOpt, lengthOpt; + bool hasFormality, hasLength; + hasFormality = context.getStringOption(UnicodeString("formality"), formalityOpt); + hasLength = context.getStringOption(UnicodeString("length"), lengthOpt); + + bool useFormal = hasFormality && formalityOpt == "formal"; + UnicodeString length = hasLength ? lengthOpt : "short"; + + const Person& p = static_cast(context.getObjectInput()); + + UnicodeString title = p.title; + UnicodeString firstName = p.firstName; + UnicodeString lastName = p.lastName; + + UnicodeString result; + if (length == "long") { + result += title; + result += " "; + result += firstName; + result += " "; + result += lastName; + } else if (length == "medium") { + if (useFormal) { + result += firstName; + result += " "; + result += lastName; + } else { + result += title; + result += " "; + result += firstName; + } + } else if (useFormal) { + // Default to "short" length + result += title; + result += " "; + result += lastName; + } else { + result += firstName; + } + + context.setOutput(result); +} + +Person::~Person() {} + +/* + See ICU4J: CustomFormatterGrammarCaseTest.java +*/ +Formatter* GrammarCasesFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + + // Locale not used + (void) locale; + + Formatter* result = new GrammarCasesFormatter(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + + +/* static */ void GrammarCasesFormatter::getDativeAndGenitive(const UnicodeString& value, UnicodeString& result) const { + UnicodeString postfix; + if (value.endsWith("ana")) { + value.extract(0, value.length() - 3, postfix); + postfix += "nei"; + } + else if (value.endsWith("ca")) { + value.extract(0, value.length() - 2, postfix); + postfix += "căi"; + } + else if (value.endsWith("ga")) { + value.extract(0, value.length() - 2, postfix); + postfix += "găi"; + } + else if (value.endsWith("a")) { + value.extract(0, value.length() - 1, postfix); + postfix += "ei"; + } + else { + postfix = "lui " + value; + } + result += postfix; +} + +void GrammarCasesFormatter::format(FormattingContext& context, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // Argument must be present + if (!context.hasFormattableInput()) { + context.setFormattingError("grammarBB", errorCode); + return; + } + + // Assumes the argument is not-yet-formatted + const Formattable& toFormat = context.getFormattableInput(); + UnicodeString result; + + switch (toFormat.getType()) { + case Formattable::Type::kString: { + const UnicodeString& in = toFormat.getString(); + UnicodeString grammarCase; + bool hasCase = context.getStringOption(UnicodeString("case"), grammarCase); + if (hasCase && (grammarCase == "dative" || grammarCase == "genitive")) { + getDativeAndGenitive(in, result); + } else { + result += in; + } + break; + } + default: { + result += toFormat.getString(); + break; + } + } + + context.setOutput(result); +} + +/* static */ FunctionRegistry* GrammarCasesFormatter::customRegistry(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer frBuilder(FunctionRegistry::builder(errorCode)); + NULL_ON_ERROR(errorCode); + + return(frBuilder-> + setFormatter(FunctionName("grammarBB"), new GrammarCasesFormatterFactory(), errorCode) + .build(errorCode)); +} + +void TestMessageFormat2::testGrammarCasesFormatter(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer customRegistry(GrammarCasesFormatter::customRegistry(errorCode)); + LocalPointer testBuilder(TestCase::builder(errorCode)); + CHECK_ERROR(errorCode); + testBuilder->setName("testGrammarCasesFormatter - genitive"); + testBuilder->setFunctionRegistry(customRegistry.orphan()); + testBuilder->setLocale(Locale("ro"), errorCode); + testBuilder->setPattern("{Cartea {$owner :grammarBB case=genitive}}"); + LocalPointer test; + + test.adoptInstead(testBuilder->setArgument("owner", "Maria", errorCode) + .setExpected("Cartea Mariei") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument("owner", "Rodica", errorCode) + .setExpected("Cartea Rodicăi") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument("owner", "Ileana", errorCode) + .setExpected("Cartea Ilenei") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument("owner", "Petre", errorCode) + .setExpected("Cartea lui Petre") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + testBuilder->setName("testGrammarCasesFormatter - nominative"); + testBuilder->setPattern("{M-a sunat {$owner :grammarBB case=nominative}}"); + + test.adoptInstead(testBuilder->setArgument("owner", "Maria", errorCode) + .setExpected("M-a sunat Maria") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument("owner", "Rodica", errorCode) + .setExpected("M-a sunat Rodica") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument("owner", "Ileana", errorCode) + .setExpected("M-a sunat Ileana") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setArgument("owner", "Petre", errorCode) + .setExpected("M-a sunat Petre") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +/* static */ FunctionRegistry* message2::ListFormatter::customRegistry(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer frBuilder(FunctionRegistry::builder(errorCode)); + NULL_ON_ERROR(errorCode); + + return(frBuilder-> + setFormatter(FunctionName("listformat"), new ListFormatterFactory(), errorCode) + .build(errorCode)); +} + +/* + See ICU4J: CustomFormatterListTest.java +*/ +Formatter* ListFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + + Formatter* result = new ListFormatter(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +void message2::ListFormatter::format(FormattingContext& context, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // Argument must be present + if (!context.hasFormattableInput()) { + context.setFormattingError("listformat", errorCode); + return; + } + // Assumes arg is not-yet-formatted + const Formattable& toFormat = context.getFormattableInput(); + + UnicodeString optType; + bool hasType = context.getStringOption(UnicodeString("type"), optType); + UListFormatterType type = UListFormatterType::ULISTFMT_TYPE_AND; + if (hasType) { + if (optType == "OR") { + type = UListFormatterType::ULISTFMT_TYPE_OR; + } else if (optType == "UNITS") { + type = UListFormatterType::ULISTFMT_TYPE_UNITS; + } + } + UnicodeString optWidth; + bool hasWidth = context.getStringOption(UnicodeString("width"), optWidth); + UListFormatterWidth width = UListFormatterWidth::ULISTFMT_WIDTH_WIDE; + if (hasWidth) { + if (optWidth == "SHORT") { + width = UListFormatterWidth::ULISTFMT_WIDTH_SHORT; + } else if (optWidth == "NARROW") { + width = UListFormatterWidth::ULISTFMT_WIDTH_NARROW; + } + } + LocalPointer lf(icu::ListFormatter::createInstance(locale, type, width, errorCode)); + CHECK_ERROR(errorCode); + + UnicodeString result; + + switch (toFormat.getType()) { + case Formattable::Type::kArray: { + int32_t n_items; + const Formattable* objs = toFormat.getArray(n_items); + if (objs == nullptr) { + context.setFormattingError("listformatter", errorCode); + return; + } + LocalArray parts(new UnicodeString[n_items]); + if (!parts.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t i = 0; i < n_items; i++) { + parts[i] = objs[i].getString(); + } + lf->format(parts.getAlias(), n_items, result, errorCode); + break; + } + default: { + result += toFormat.getString(); + break; + } + } + + context.setOutput(result); +} + +void TestMessageFormat2::testListFormatter(IcuTestErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + const UnicodeString progLanguages[3] = { + "C/C++", + "Java", + "Python" + }; + LocalPointer testBuilder(TestCase::builder(errorCode)); + + LocalPointer reg(ListFormatter::customRegistry(errorCode)); + CHECK_ERROR(errorCode); + + testBuilder->setFunctionRegistry(reg.orphan()); + testBuilder->setArgument("languages", progLanguages, 3, errorCode); + + LocalPointer test(testBuilder->setName("testListFormatter") + .setPattern("{I know {$languages :listformat type=AND}!}") + .setExpected("I know C/C++, Java, and Python!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setName("testListFormatter") + .setPattern("{You are allowed to use {$languages :listformat type=OR}!}") + .setExpected("You are allowed to use C/C++, Java, or Python!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +/* + See ICU4J: CustomFormatterMessageRefTest.java +*/ + +/* static */ FunctionRegistry* message2::ResourceManager::customRegistry(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer frBuilder(FunctionRegistry::builder(errorCode)); + NULL_ON_ERROR(errorCode); + + return(frBuilder-> + setFormatter(FunctionName("msgRef"), new ResourceManagerFactory(), errorCode) + .build(errorCode)); +} + +/* static */ Hashtable* message2::ResourceManager::properties(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer result(new Hashtable(uhash_compareUnicodeString, nullptr, errorCode)); + NULL_ON_ERROR(errorCode); + result->setValueDeleter(uprv_deleteUObject); + + LocalPointer value(new UnicodeString("match {$gcase :select} when genitive {Firefoxin} when * {Firefox}")); + if (!value.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + result->put("firefox", value.orphan(), errorCode); + value.adoptInstead(new UnicodeString("match {$gcase :select} when genitive {Chromen} when * {Chrome}")); + if (!value.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + result->put("chrome", value.orphan(), errorCode); + value.adoptInstead(new UnicodeString("match {$gcase :select} when genitive {Safarin} when * {Safari}")); + if (!value.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + result->put("safari", value.orphan(), errorCode); + return result.orphan(); +} + +Formatter* ResourceManagerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + + Formatter* result = new ResourceManager(locale); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +using Arguments = MessageArguments; + +static Arguments* localToGlobal(const FormattingContext& context, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + LocalPointer args(Arguments::builder(errorCode)); + NULL_ON_ERROR(errorCode); + + int32_t pos = context.firstOption(); + UnicodeString optionName; + while (true) { + const Formattable* optionValue = context.nextOption(pos, optionName); + if (optionValue == nullptr) { + break; + } + switch (optionValue->getType()) { + case Formattable::Type::kString: { + // add it as a string arg + args->add(optionName, optionValue->getString(), errorCode); + break; + } + case Formattable::Type::kDouble: { + args->addDouble(optionName, optionValue->getDouble(), errorCode); + break; + } + case Formattable::Type::kInt64: { + args->addInt64(optionName, optionValue->getInt64(), errorCode); + break; + } + case Formattable::Type::kLong: { + args->addInt64(optionName, (int64_t) optionValue->getLong(), errorCode); + break; + } + case Formattable::Type::kDate: { + args->addDate(optionName, optionValue->getDate(), errorCode); + break; + } + default: { + // Ignore other types + continue; + } + } + } + return args->build(errorCode); +} + +void ResourceManager::format(FormattingContext& context, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // Argument must be present + if (!context.hasFormattableInput()) { + context.setFormattingError("msgref", errorCode); + return; + } + + // Assumes arg is not-yet-formatted + const Formattable& toFormat = context.getFormattableInput(); + UnicodeString in; + switch (toFormat.getType()) { + case Formattable::Type::kString: { + in = toFormat.getString(); + break; + } + default: { + // Ignore non-strings + return; + } + } + + UnicodeString resbundle("resbundle"); + bool hasProperties = context.hasObjectOption(resbundle); + // If properties were provided, look up the given string in the properties, + // yielding a message + if (hasProperties) { + const Hashtable& properties = reinterpret_cast(context.getObjectOption(resbundle)); + UnicodeString* msg = (UnicodeString*) properties.get(in); + if (msg == nullptr) { + // No message given for this key -- error out + context.setFormattingError("msgref", errorCode); + return; + } + LocalPointer mfBuilder(MessageFormatter::builder(errorCode)); + CHECK_ERROR(errorCode); + UParseError parseErr; + // Any parse/data model errors will be propagated + LocalPointer mf(mfBuilder->setPattern(*msg) + .build(parseErr, errorCode)); + CHECK_ERROR(errorCode); + UnicodeString result; + + LocalPointer arguments(localToGlobal(context, errorCode)); + CHECK_ERROR(errorCode); + + UErrorCode savedStatus = errorCode; + mf->formatToString(*arguments, errorCode, result); + // Here, we want to ignore errors (this matches the behavior in the ICU4J test). + // For example: we want $gcase to default to "$gcase" if the gcase option was + // omitted. + if (U_FAILURE(errorCode)) { + errorCode = savedStatus; + } + context.setOutput(result); + } else { + // Properties must be provided + context.setFormattingError("msgref", errorCode); + } + return; +} + + +void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer properties(ResourceManager::properties(errorCode)); + LocalPointer testBuilder(TestCase::builder(errorCode)); + CHECK_ERROR(errorCode); + testBuilder->setLocale(Locale("ro"), errorCode); + testBuilder->setFunctionRegistry(ResourceManager::customRegistry(errorCode)); + testBuilder->setPattern(*((UnicodeString*) properties->get("firefox"))); + LocalPointer test; + testBuilder->setName("message-ref"); + + test.adoptInstead(testBuilder->setArgument("gcase", "whatever", errorCode) + .setExpected("Firefox") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setArgument("gcase", "genitive", errorCode) + .setExpected("Firefoxin") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + testBuilder->setPattern(*((UnicodeString*) properties->get("chrome"))); + + test.adoptInstead(testBuilder->setArgument("gcase", "whatever", errorCode) + .setExpected("Chrome") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setArgument("gcase", "genitive", errorCode) + .setExpected("Chromen") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + testBuilder->setArgument("res", (UObject*) properties.getAlias(), errorCode); + + testBuilder->setPattern("{Please start {$browser :msgRef gcase=genitive resbundle=$res}}"); + test.adoptInstead(testBuilder->setArgument("browser", "firefox", errorCode) + .setExpected("Please start Firefoxin") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setArgument("browser", "chrome", errorCode) + .setExpected("Please start Chromen") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setArgument("browser", "safari", errorCode) + .setExpected("Please start Safarin") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + testBuilder->setPattern("{Please start {$browser :msgRef resbundle=$res}}"); + test.adoptInstead(testBuilder->setArgument("browser", "firefox", errorCode) + .setExpected("Please start Firefox") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setArgument("browser", "chrome", errorCode) + .setExpected("Please start Chrome") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder->setArgument("browser", "safari", errorCode) + .setExpected("Please start Safari") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/messageformat2test_features.cpp b/icu4c/source/test/intltest/messageformat2test_features.cpp new file mode 100644 index 000000000000..94589e06118d --- /dev/null +++ b/icu4c/source/test/intltest/messageformat2test_features.cpp @@ -0,0 +1,1089 @@ +// © 2016 and later: Unicode, Inc. and others. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/gregocal.h" +#include "unicode/messageformat2.h" +#include "messageformat2test.h" + +using namespace icu::message2; + +/* + Tests based on ICU4J's MessageFormat2Test.java +and Mf2FeaturesTest.java +*/ + +/* + TODO: Tests need to be unified in a single format that + both ICU4C and ICU4J can use, rather than being embedded in code. +*/ + +/* +Tests reflect the syntax specified in + + https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf + +as of the following commit from 2023-05-09: + https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867 + +*/ + +void TestMessageFormat2::testEmptyMessage(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer test(testBuilder.setPattern("{}") + .setExpected("") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testPlainText(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer test(testBuilder.setPattern("{Hello World!}") + .setExpected("Hello World!") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testPlaceholders(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer test(testBuilder.setPattern("{Hello, {$userName}!}") + .setExpected("Hello, John!") + .setArgument("userName", "John", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testArgumentMissing(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + UnicodeString message = "{Hello {$name}, today is {$today :datetime skeleton=yMMMMdEEEE}.}"; + LocalPointer cal(Calendar::createInstance(errorCode)); + CHECK_ERROR(errorCode); + + // November 23, 2022 at 7:42:37.123 PM + cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37); + UDate TEST_DATE = cal->getTime(errorCode); + CHECK_ERROR(errorCode); + + LocalPointer test(testBuilder.setPattern(message) + .setArgument("name", "John", errorCode) + .setDateArgument("today", TEST_DATE, errorCode) + .setExpected("Hello John, today is Wednesday, November 23, 2022.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Missing date argument + test.adoptInstead(testBuilder.setPattern(message) + .clearArguments(errorCode) + .setArgument("name", "John", errorCode) + .setExpected("Hello John, today is {$today}.") + .setExpectedError(U_UNRESOLVED_VARIABLE_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setPattern(message) + .clearArguments(errorCode) + .setDateArgument("today", TEST_DATE, errorCode) + .setExpectedError(U_UNRESOLVED_VARIABLE_ERROR) + .setExpected("Hello {$name}, today is Wednesday, November 23, 2022.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Both arguments missing + test.adoptInstead(testBuilder.setPattern(message) + .clearArguments(errorCode) + .setExpectedError(U_UNRESOLVED_VARIABLE_ERROR) + .setExpected("Hello {$name}, today is {$today}.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testDefaultLocale(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer cal(Calendar::createInstance(errorCode)); + CHECK_ERROR(errorCode); + // November 23, 2022 at 7:42:37.123 PM + cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37); + UDate TEST_DATE = cal->getTime(errorCode); + CHECK_ERROR(errorCode); + + UnicodeString message = "{Date: {$date :datetime skeleton=yMMMMdEEEE}.}"; + UnicodeString expectedEn = "Date: Wednesday, November 23, 2022."; + UnicodeString expectedRo = "Date: miercuri, 23 noiembrie 2022."; + + testBuilder.setPattern(message); + + LocalPointer test; + test.adoptInstead(testBuilder.clearArguments(errorCode) + .setDateArgument("date", TEST_DATE, errorCode) + .setExpected(expectedEn) + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected(expectedRo) + .setLocale(Locale("ro"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + Locale originalLocale = Locale::getDefault(); + Locale::setDefault(Locale::forLanguageTag("ro", errorCode), errorCode); + CHECK_ERROR(errorCode); + + test.adoptInstead(testBuilder.setExpected(expectedEn) + .setLocale(Locale("en", "US"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected(expectedRo) + .clearLocale() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + Locale::setDefault(originalLocale, errorCode); + CHECK_ERROR(errorCode); +} + +void TestMessageFormat2::testSpecialPluralWithDecimals(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + UnicodeString message; + + message = "let $amount = {$count :number}\n\ + match {$amount :plural}\n\ + when 1 {I have {$amount} dollar.}\n\ + when * {I have {$amount} dollars.}\n"; + + LocalPointer test; + + test.adoptInstead(testBuilder.setPattern(message) + .clearArguments(errorCode) + .setArgument("count", (int64_t) 1, errorCode) + .setExpected("I have 1 dollar.") + .setLocale(Locale("en", "US"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + message = "let $amount = {$count :number skeleton=|.00*|}\n\ + match {$amount :plural skeleton=|.00*|}\n\ + when 1 {I have {$amount} dollar.}\n\ + when * {I have {$amount} dollars.}\n"; + + test.adoptInstead(testBuilder.setPattern(message) + .setExpected("I have 1.00 dollar.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testDefaultFunctionAndOptions(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer cal(Calendar::createInstance(errorCode)); + CHECK_ERROR(errorCode); + // November 23, 2022 at 7:42:37.123 PM + cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37); + UDate TEST_DATE = cal->getTime(errorCode); + CHECK_ERROR(errorCode); + LocalPointer test; + + test.adoptInstead(testBuilder.setPattern("{Testing date formatting: {$date}.}") + .clearArguments(errorCode) + .setDateArgument("date", TEST_DATE, errorCode) + .setExpected("Testing date formatting: 23.11.2022, 19:42.") + .setLocale(Locale("ro"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setPattern("{Testing date formatting: {$date :datetime}.}") + .setExpected("Testing date formatting: 23.11.2022, 19:42.") + .setLocale(Locale("ro"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testSimpleSelection(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + (void) testBuilder; + (void) errorCode; + + /* Covered by testPlural */ +} + +void TestMessageFormat2::testComplexSelection(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer test; + + UnicodeString message = "match {$photoCount :plural} {$userGender :select}\n\ + when 1 masculine {{$userName} added a new photo to his album.}\n\ + when 1 feminine {{$userName} added a new photo to her album.}\n\ + when 1 * {{$userName} added a new photo to their album.}\n\ + when * masculine {{$userName} added {$photoCount} photos to his album.}\n\ + when * feminine {{$userName} added {$photoCount} photos to her album.}\n\ + when * * {{$userName} added {$photoCount} photos to their album.}"; + testBuilder.setPattern(message); + + int64_t count = 1; + test.adoptInstead(testBuilder.setArgument("photoCount", count, errorCode) + .setArgument("userGender", "masculine", errorCode) + .setArgument("userName", "John", errorCode) + .setExpected("John added a new photo to his album.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setArgument("userGender", "feminine", errorCode) + .setArgument("userName", "Anna", errorCode) + .setExpected("Anna added a new photo to her album.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setArgument("userGender", "unknown", errorCode) + .setArgument("userName", "Anonymous", errorCode) + .setExpected("Anonymous added a new photo to their album.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + count = 13; + test.adoptInstead(testBuilder.setArgument("photoCount", count, errorCode) + .setArgument("userGender", "masculine", errorCode) + .setArgument("userName", "John", errorCode) + .setExpected("John added 13 photos to his album.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setArgument("userGender", "feminine", errorCode) + .setArgument("userName", "Anna", errorCode) + .setExpected("Anna added 13 photos to her album.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setArgument("userGender", "unknown", errorCode) + .setArgument("userName", "Anonymous", errorCode) + .setExpected("Anonymous added 13 photos to their album.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testSimpleLocalVariable(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer test; + LocalPointer cal(Calendar::createInstance(errorCode)); + CHECK_ERROR(errorCode); + // November 23, 2022 at 7:42:37.123 PM + cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37); + UDate TEST_DATE = cal->getTime(errorCode); + CHECK_ERROR(errorCode); + + testBuilder.setPattern("let $expDate = {$expDate :datetime skeleton=yMMMdE}\n\ + {Your tickets expire on {$expDate}.}"); + + int64_t count = 1; + test.adoptInstead(testBuilder.setArgument("count", count, errorCode) + .setLocale(Locale("en"), errorCode) + .setDateArgument("expDate", TEST_DATE, errorCode) + .setExpected("Your tickets expire on Wed, Nov 23, 2022.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testLocalVariableWithSelect(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer test; + LocalPointer cal(Calendar::createInstance(errorCode)); + CHECK_ERROR(errorCode); + // November 23, 2022 at 7:42:37.123 PM + cal->set(2022, Calendar::NOVEMBER, 23, 19, 42, 37); + UDate TEST_DATE = cal->getTime(errorCode); + CHECK_ERROR(errorCode); + + testBuilder.setPattern("let $expDate = {$expDate :datetime skeleton=yMMMdE}\n\ + match {$count :plural}\n\ + when 1 {Your ticket expires on {$expDate}.}\n\ + when * {Your {$count} tickets expire on {$expDate}.}\n"); + + int64_t count = 1; + test.adoptInstead(testBuilder.setArgument("count", count, errorCode) + .setLocale(Locale("en"), errorCode) + .setDateArgument("expDate", TEST_DATE, errorCode) + .setExpected("Your ticket expires on Wed, Nov 23, 2022.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + count = 3; + test.adoptInstead(testBuilder.setArgument("count", count, errorCode) + .setExpected("Your 3 tickets expire on Wed, Nov 23, 2022.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testDateFormat(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + LocalPointer cal(Calendar::createInstance(errorCode)); + CHECK_ERROR(errorCode); + + cal->set(2022, Calendar::OCTOBER, 27, 0, 0, 0); + UDate expiration = cal->getTime(errorCode); + CHECK_ERROR(errorCode); + + LocalPointer test(testBuilder.setPattern("{Your card expires on {$exp :datetime skeleton=yMMMdE}!}") + .setLocale(Locale("en"), errorCode) + .setExpected("Your card expires on Thu, Oct 27, 2022!") + .setDateArgument("exp", expiration, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{Your card expires on {$exp :datetime datestyle=full}!}") + .setExpected("Your card expires on Thursday, October 27, 2022!") + .setDateArgument("exp", expiration, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{Your card expires on {$exp :datetime datestyle=long}!}") + .setExpected("Your card expires on October 27, 2022!") + .setDateArgument("exp", expiration, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{Your card expires on {$exp :datetime datestyle=medium}!}") + .setExpected("Your card expires on Oct 27, 2022!") + .setDateArgument("exp", expiration, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{Your card expires on {$exp :datetime datestyle=short}!}") + .setExpected("Your card expires on 10/27/22!") + .setDateArgument("exp", expiration, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + +/* + This test would require the calendar to be passed as a UObject* with the datetime formatter + doing an RTTI check -- however, that would be awkward, since it would have to check the tag for each + possible subclass of `Calendar`. datetime currently has no support for formatting any object argument + + cal.adoptInstead(new GregorianCalendar(2022, Calendar::OCTOBER, 27, errorCode)); + if (cal.isValid()) { + test.adoptInstead(testBuilder.setPattern("{Your card expires on {$exp :datetime skeleton=yMMMdE}!}") + .setExpected("Your card expires on Thu, Oct 27, 2022!") + .setArgument("exp", cal.orphan(), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + } +*/ + + // Implied function based on type of the object to format + test.adoptInstead(testBuilder.setPattern("{Your card expires on {$exp}!}") + .setExpected("Your card expires on 10/27/22, 12:00\u202FAM!") + .setDateArgument("exp", expiration, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testPlural(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + UnicodeString message = "match {$count :plural}\n\ + when 1 {You have one notification.}\n \ + when * {You have {$count} notifications.}\n"; + + int64_t count = 1; + LocalPointer test(testBuilder.setPattern(message) + .setExpected("You have one notification.") + .setArgument("count", count, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + count = 42; + test.adoptInstead(testBuilder.setExpected("You have 42 notifications.") + .setArgument("count", count, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + count = 1; + test.adoptInstead(testBuilder.setPattern(message) + .setExpected("You have one notification.") + .setArgument("count", "1", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + count = 42; + test.adoptInstead(testBuilder.setExpected("You have 42 notifications.") + .setArgument("count", "42", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testPluralOrdinal(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + UnicodeString message = "match {$place :selectordinal}\n\ + when 1 {You got the gold medal}\n \ + when 2 {You got the silver medal}\n \ + when 3 {You got the bronze medal}\n\ + when one {You got in the {$place}st place}\n\ + when two {You got in the {$place}nd place}\n \ + when few {You got in the {$place}rd place}\n \ + when * {You got in the {$place}th place}\n"; + + LocalPointer test(testBuilder.setPattern(message) + .setExpected("You got the gold medal") + .setArgument("place", "1", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("You got the silver medal") + .setArgument("place", "2", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("You got the bronze medal") + .setArgument("place", "3", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("You got in the 21st place") + .setArgument("place", "21", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("You got in the 32nd place") + .setArgument("place", "32", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("You got in the 23rd place") + .setArgument("place", "23", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("You got in the 15th place") + .setArgument("place", "15", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +TemperatureFormatter::TemperatureFormatter(const Locale& l, TemperatureFormatterFactory& c, UErrorCode& errorCode) : locale(l), counter(c) { + CHECK_ERROR(errorCode); + + cachedFormatters.adoptInstead(new Hashtable(uhash_compareUnicodeString, nullptr, errorCode)); + if (!cachedFormatters.isValid()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + cachedFormatters->setValueDeleter(uprv_free); + counter.constructCount++; +} + +Formatter* TemperatureFormatterFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + LocalPointer result(new TemperatureFormatter(locale, *this, errorCode)); + NULL_ON_ERROR(errorCode) + return result.orphan(); +} + +void TemperatureFormatter::format(FormattingContext& context, UErrorCode& errorCode) const { + CHECK_ERROR(errorCode); + + // Argument must be present + if (!context.hasFormattableInput()) { + context.setFormattingError("temp", errorCode); + return; + } + // Assume arg is not-yet-formatted + const Formattable& toFormat = context.getFormattableInput(); + + counter.formatCount++; + + UnicodeString unit; + bool unitExists = context.getStringOption(UnicodeString("unit"), unit); + if (!unitExists) { + context.setFormattingError("temp", errorCode); + return; + } + UnicodeString skeleton; + bool skeletonExists = context.getStringOption(UnicodeString("skeleton"), skeleton); + + number::LocalizedNumberFormatter* realNfCached = (number::LocalizedNumberFormatter*) cachedFormatters->get(unit); + number::LocalizedNumberFormatter realNf; + if (realNfCached == nullptr) { + number::LocalizedNumberFormatter nf = skeletonExists + ? number::NumberFormatter::forSkeleton(skeleton, errorCode).locale(locale) + : number::NumberFormatter::withLocale(locale); + + if (unit == "C") { + counter.cFormatterCount++; + realNf = nf.unit(MeasureUnit::getCelsius()); + } else if (unit == "F") { + counter.fFormatterCount++; + realNf = nf.unit(MeasureUnit::getFahrenheit()); + } else { + realNf = nf; + } + realNfCached = new number::LocalizedNumberFormatter(realNf); + if (realNfCached == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + cachedFormatters->put(unit, realNfCached, errorCode); + } else { + realNf = *realNfCached; + } + + number::FormattedNumber result; + switch (toFormat.getType()) { + case Formattable::Type::kDouble: { + result = realNf.formatDouble(toFormat.getDouble(), + errorCode); + break; + } + case Formattable::Type::kLong: { + result = realNf.formatInt(toFormat.getLong(), + errorCode); + break; + } + case Formattable::Type::kInt64: { + result = realNf.formatInt(toFormat.getInt64(), + errorCode); + break; + } + default: { + context.setOutput(UnicodeString()); + return; + } + } + context.setOutput(std::move(result)); +} + +TemperatureFormatter::~TemperatureFormatter() {} + +void putFormattableArg(Hashtable& arguments, const UnicodeString& k, const UnicodeString& arg, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + Formattable* valPtr(new Formattable(arg)); + if (valPtr == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } else { + arguments.put(k, valPtr, errorCode); + } +} + +void putFormattableArg(Hashtable& arguments, const UnicodeString& k, int64_t arg, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + Formattable* valPtr(new Formattable(arg)); + if (valPtr == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } else { + arguments.put(k, valPtr, errorCode); + } +} + +void putFormattableArg(Hashtable& arguments, const UnicodeString& k, double arg, UErrorCode& errorCode) { + CHECK_ERROR(errorCode); + Formattable* valPtr(new Formattable(arg)); + if (valPtr == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } else { + arguments.put(k, valPtr, errorCode); + } +} + +void TestMessageFormat2::testFormatterIsCreatedOnce(IcuTestErrorCode& errorCode) { + LocalPointer frBuilder(FunctionRegistry::builder(errorCode)); + CHECK_ERROR(errorCode); + + // Counter will be adopted by function registry + TemperatureFormatterFactory* counter = new TemperatureFormatterFactory(); + if (counter == nullptr) { + ((UErrorCode&) errorCode) = U_MEMORY_ALLOCATION_ERROR; + return; + } + + LocalPointer reg(frBuilder->setFormatter(FunctionName("temp"), + counter, errorCode) + .build(errorCode)); + CHECK_ERROR(errorCode); + UnicodeString message = "{Testing {$count :temp unit=$unit skeleton=|.00/w|}.}"; + + LocalPointer mfBuilder(MessageFormatter::builder(errorCode)); + CHECK_ERROR(errorCode); + mfBuilder->setPattern(message).setFunctionRegistry(reg.getAlias()); + UParseError parseError; + LocalPointer mf(mfBuilder->build(parseError, errorCode)); + UnicodeString result; + UnicodeString countKey("count"); + UnicodeString unitKey("unit"); + + const int64_t maxCount = 10; + char expected[20]; + LocalPointer argumentsBuilder(MessageArguments::builder(errorCode)); + LocalPointer arguments; + CHECK_ERROR(errorCode); + for (int64_t count = 0; count < maxCount; count++) { + snprintf(expected, sizeof(expected), "Testing %ld°C.", count); + + argumentsBuilder->addInt64(countKey, count, errorCode); + argumentsBuilder->add(unitKey, "C", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + CHECK_ERROR(errorCode); + + mf->formatToString(*arguments, errorCode, result); + assertEquals("temperature formatter", expected, result); + result.remove(); + + snprintf(expected, sizeof(expected), "Testing %ld°F.", count); + argumentsBuilder->addInt64(countKey, count, errorCode); + argumentsBuilder->add(unitKey, "F", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + CHECK_ERROR(errorCode); + + mf->formatToString(*arguments, errorCode, result); + assertEquals("temperature formatter", expected, result); + result.remove(); + } + + assertEquals("cached formatter", 1, counter->constructCount); + assertEquals("cached formatter", (int64_t) maxCount * 2, (int64_t) counter->formatCount); + assertEquals("cached formatter", 1, counter->fFormatterCount); + assertEquals("cached formatter", 1, counter->cFormatterCount); + + result.remove(); + argumentsBuilder->addDouble(countKey, 12.0, errorCode); + argumentsBuilder->add(unitKey, "C", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + CHECK_ERROR(errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("cached formatter", "Testing 12°C.", result); + + result.remove(); + argumentsBuilder->addDouble(countKey, 12.5, errorCode); + argumentsBuilder->add(unitKey, "F", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + CHECK_ERROR(errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("cached formatter", "Testing 12.50°F.", result); + + result.remove(); + argumentsBuilder->addDouble(countKey, 12.54, errorCode); + argumentsBuilder->add(unitKey, "C", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + CHECK_ERROR(errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("cached formatter", "Testing 12.54°C.", result); + + result.remove(); + argumentsBuilder->addDouble(countKey, 12.54321, errorCode); + argumentsBuilder->add(unitKey, "F", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + + CHECK_ERROR(errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("cached formatter", "Testing 12.54°F.", result); + + // Check skeleton + message = "{Testing {$count :temp unit=$unit skeleton=|.0/w|}.}"; + mfBuilder->setPattern(message); + mf.adoptInstead(mfBuilder->build(parseError, errorCode)); + + result.remove(); + argumentsBuilder->addDouble(countKey, 12.0, errorCode); + argumentsBuilder->add(unitKey, "C", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + + CHECK_ERROR(errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("cached formatter", "Testing 12°C.", result); + + result.remove(); + argumentsBuilder->addDouble(countKey, 12.5, errorCode); + argumentsBuilder->add(unitKey, "F", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + + CHECK_ERROR(errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("cached formatter", "Testing 12.5°F.", result); + + result.remove(); + argumentsBuilder->addDouble(countKey, 12.54, errorCode); + argumentsBuilder->add(unitKey, "C", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + + CHECK_ERROR(errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("cached formatter", "Testing 12.5°C.", result); + + result.remove(); + argumentsBuilder->addDouble(countKey, 12.54321, errorCode); + argumentsBuilder->add(unitKey, "F", errorCode); + arguments.adoptInstead(argumentsBuilder->build(errorCode)); + CHECK_ERROR(errorCode); + mf->formatToString(*arguments, errorCode, result); + assertEquals("cached formatter", "Testing 12.5°F.", result); + +} + +void TestMessageFormat2::testPluralWithOffset(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + UnicodeString message = "match {$count :plural offset=2}\n\ + when 1 {Anna}\n\ + when 2 {Anna and Bob}\n\ + when one {Anna, Bob, and {$count :number offset=2} other guest}\n\ + when * {Anna, Bob, and {$count :number offset=2} other guests}\n"; + + testBuilder.setPattern(message); + testBuilder.setName("plural with offset"); + + LocalPointer test(testBuilder.setExpected("Anna") + .setArgument("count", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Anna and Bob") + .setArgument("count", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Anna, Bob, and 1 other guest") + .setArgument("count", (int64_t) 3, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Anna, Bob, and 2 other guests") + .setArgument("count", (int64_t) 4, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Anna, Bob, and 10 other guests") + .setArgument("count", (int64_t) 12, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testPluralWithOffsetAndLocalVar(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + + // $foo should "inherit" the offset + UnicodeString message = "let $foo = {$count :number offset=2}\ + match {$foo :plural}\n \ + when 1 {Anna}\n \ + when 2 {Anna and Bob}\n \ + when one {Anna, Bob, and {$foo} other guest}\n \ + when * {Anna, Bob, and {$foo} other guests}\n"; + + testBuilder.setPattern(message); + testBuilder.setName("plural with offset and local var"); + + LocalPointer test(testBuilder.setExpected("Anna") + .setArgument("count", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Anna and Bob") + .setArgument("count", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Anna, Bob, and 1 other guest") + .setArgument("count", (int64_t) 3, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Anna, Bob, and 2 other guests") + .setArgument("count", (int64_t) 4, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Anna, Bob, and 10 other guests") + .setArgument("count", (int64_t) 12, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + message = "let $foo = {$amount :number skeleton=|.00/w|}\n\ + match {$foo :plural}\n\ + when 1 {Last dollar}\n\ + when one {{$foo} dollar}\n\ + when * {{$foo} dollars}\n"; + testBuilder.setPattern(message); + test.adoptInstead(testBuilder.setExpected("Last dollar") + .setArgument("amount", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("2 dollars") + .setArgument("amount", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("3 dollars") + .setArgument("amount", (int64_t) 3, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testDeclareBeforeUse(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + + UnicodeString message = "let $foo = {$baz :number}\n\ + let $bar = {$foo}\n \ + let $baz = {$bar}\n \ + {The message uses {$baz} and works}\n"; + testBuilder.setPattern(message); + testBuilder.setName("declare-before-use"); + + LocalPointer test(testBuilder.setExpected("The message uses {$baz} and works") + .setExpectedError(U_UNRESOLVED_VARIABLE_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testVariableOptionsInSelector(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + UnicodeString message = "match {$count :plural offset=$delta}\n\ + when 1 {A}\n\ + when 2 {A and B}\n\ + when one {A, B, and {$count :number offset=$delta} more character}\n\ + when * {A, B, and {$count :number offset=$delta} more characters}\n"; + + testBuilder.setPattern(message); + testBuilder.setName("variable options in selector"); + testBuilder.setExpectSuccess(); + + LocalPointer test(testBuilder.setExpected("A") + .setArgument("count", (int64_t) 1, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A and B") + .setArgument("count", (int64_t) 2, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A, B, and 1 more character") + .setArgument("count", (int64_t) 3, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A, B, and 5 more characters") + .setArgument("count", (int64_t) 7, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + message = "match {$count :plural offset=$delta}\n\ + when 1 {Exactly 1}\n\ + when 2 {Exactly 2}\n\ + when * {Count = {$count :number offset=$delta} and delta={$delta}.}\n"; + testBuilder.setPattern(message); + + test.adoptInstead(testBuilder.setExpected("Exactly 1") + .setArgument("count", (int64_t) 1, errorCode) + .setArgument("delta", (int64_t) 0, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 1") + .setArgument("count", (int64_t) 1, errorCode) + .setArgument("delta", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 1") + .setArgument("count", (int64_t) 1, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 2") + .setArgument("count", (int64_t) 2, errorCode) + .setArgument("delta", (int64_t) 0, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 2") + .setArgument("count", (int64_t) 2, errorCode) + .setArgument("delta", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 2") + .setArgument("count", (int64_t) 2, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 3 and delta=0.") + .setArgument("count", (int64_t) 3, errorCode) + .setArgument("delta", (int64_t) 0, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 2 and delta=1.") + .setArgument("count", (int64_t) 3, errorCode) + .setArgument("delta", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 1 and delta=2.") + .setArgument("count", (int64_t) 3, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 23 and delta=0.") + .setArgument("count", (int64_t) 23, errorCode) + .setArgument("delta", (int64_t) 0, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 22 and delta=1.") + .setArgument("count", (int64_t) 23, errorCode) + .setArgument("delta", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 21 and delta=2.") + .setArgument("count", (int64_t) 23, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testVariableOptionsInSelectorWithLocalVar(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + UnicodeString messageFix = "let $offCount = {$count :number offset=2}\n\ + match {$offCount :plural}\n\ + when 1 {A}\n\ + when 2 {A and B}\n\ + when one {A, B, and {$offCount} more character}\n\ + when * {A, B, and {$offCount} more characters}\n"; + + testBuilder.setPattern(messageFix); + testBuilder.setName("variable options in selector with local var"); + testBuilder.setExpectSuccess(); + + LocalPointer test(testBuilder.setExpected("A") + .setArgument("count", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A and B") + .setArgument("count", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A, B, and 1 more character") + .setArgument("count", (int64_t) 3, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A, B, and 5 more characters") + .setArgument("count", (int64_t) 7, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + UnicodeString messageVar = "let $offCount = {$count :number offset=$delta}\n\ + match {$offCount :plural}\n\ + when 1 {A}\n\ + when 2 {A and B}\n\ + when one {A, B, and {$offCount} more character}\n\ + when * {A, B, and {$offCount} more characters}\n"; + testBuilder.setPattern(messageVar); + + test.adoptInstead(testBuilder.setExpected("A") + .setArgument("count", (int64_t) 1, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A and B") + .setArgument("count", (int64_t) 2, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A, B, and 1 more character") + .setArgument("count", (int64_t) 3, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("A, B, and 5 more characters") + .setArgument("count", (int64_t) 7, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + UnicodeString messageVar2 = "let $offCount = {$count :number offset=$delta}\n\ + match {$offCount :plural}\n\ + when 1 {Exactly 1}\n\ + when 2 {Exactly 2}\n\ + when * {Count = {$count}, OffCount = {$offCount}, and delta={$delta}.}\n"; + testBuilder.setPattern(messageVar2); + test.adoptInstead(testBuilder.setExpected("Exactly 1") + .setArgument("count", (int64_t) 1, errorCode) + .setArgument("delta", (int64_t) 0, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 1") + .setArgument("count", (int64_t) 1, errorCode) + .setArgument("delta", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 1") + .setArgument("count", (int64_t) 1, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Exactly 2") + .setArgument("count", (int64_t) 2, errorCode) + .setArgument("delta", (int64_t) 0, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 2") + .setArgument("count", (int64_t) 2, errorCode) + .setArgument("delta", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Exactly 2") + .setArgument("count", (int64_t) 2, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 3, OffCount = 3, and delta=0.") + .setArgument("count", (int64_t) 3, errorCode) + .setArgument("delta", (int64_t) 0, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 3, OffCount = 2, and delta=1.") + .setArgument("count", (int64_t) 3, errorCode) + .setArgument("delta", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 3, OffCount = 1, and delta=2.") + .setArgument("count", (int64_t) 3, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setExpected("Count = 23, OffCount = 23, and delta=0.") + .setArgument("count", (int64_t) 23, errorCode) + .setArgument("delta", (int64_t) 0, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 23, OffCount = 22, and delta=1.") + .setArgument("count", (int64_t) 23, errorCode) + .setArgument("delta", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + test.adoptInstead(testBuilder.setExpected("Count = 23, OffCount = 21, and delta=2.") + .setArgument("count", (int64_t) 23, errorCode) + .setArgument("delta", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + + +void TestMessageFormat2::featureTests() { + IcuTestErrorCode errorCode(*this, "featureTests"); + + LocalPointer testBuilder(TestCase::builder(errorCode)); + testBuilder->setName("featureTests"); + + testEmptyMessage(*testBuilder, errorCode); + testPlainText(*testBuilder, errorCode); + testPlaceholders(*testBuilder, errorCode); + testArgumentMissing(*testBuilder, errorCode); + testDefaultLocale(*testBuilder, errorCode); + testSpecialPluralWithDecimals(*testBuilder, errorCode); + testDefaultFunctionAndOptions(*testBuilder, errorCode); + testSimpleSelection(*testBuilder, errorCode); + testComplexSelection(*testBuilder, errorCode); + testSimpleLocalVariable(*testBuilder, errorCode); + testLocalVariableWithSelect(*testBuilder, errorCode); + + testDateFormat(*testBuilder, errorCode); + testPlural(*testBuilder, errorCode); + testPluralOrdinal(*testBuilder, errorCode); + testFormatterIsCreatedOnce(errorCode); + testPluralWithOffset(*testBuilder, errorCode); + testPluralWithOffsetAndLocalVar(*testBuilder, errorCode); + testDeclareBeforeUse(*testBuilder, errorCode); + testVariableOptionsInSelector(*testBuilder, errorCode); + testVariableOptionsInSelectorWithLocalVar(*testBuilder, errorCode); +} + +TestCase::~TestCase() {} +TestCase::Builder::~Builder() {} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/messageformat2test_fromjson.cpp b/icu4c/source/test/intltest/messageformat2test_fromjson.cpp new file mode 100644 index 000000000000..8387c29ce870 --- /dev/null +++ b/icu4c/source/test/intltest/messageformat2test_fromjson.cpp @@ -0,0 +1,541 @@ +// © 2016 and later: Unicode, Inc. and others. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messageformat2.h" +#include "messageformat2test.h" + +using namespace icu::message2; + +/* + TODO: Tests need to be unified in a single format that + both ICU4C and ICU4J can use, rather than being embedded in code. + + Tests are included in their current state to give a sense of + how much test coverage has been achieved. Most of the testing is + of the parser/serializer; the formatter needs to be tested more + thoroughly. +*/ + +/* +Tests reflect the syntax specified in + + https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf + +as of the following commit from 2023-05-09: + https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867 + +*/ + +/* + Transcribed from https://github.com/messageformat/messageformat/blob/main/packages/mf2-messageformat/src/__fixtures/test-messages.json +https://github.com/messageformat/messageformat/commit/6656c95d66414da29a332a6f5bbb225371f2b9a3 + +*/ +void TestMessageFormat2::jsonTests(IcuTestErrorCode& errorCode) { + LocalPointer testBuilder(TestCase::builder(errorCode)); + testBuilder->setName("jsonTests"); + + LocalPointer test(testBuilder->setPattern("{hello}") + .setExpected("hello") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {|world|}}") + .setExpected("hello world") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {||}}") + .setExpected("hello ") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {$place}}") + .setExpected("hello world") + .setArgument("place", "world", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {$place:-.}}") + .setExpected("hello world") + .setArgument("place:-.", "world", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {$place}}") + .setExpected("hello {$place}") + .clearArguments(errorCode) + .setExpectedError(U_UNRESOLVED_VARIABLE_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{$one} and {$two}}") + .setExpected("1.3 and 4.2") + .setExpectSuccess() + .setArgument("one", 1.3, errorCode) + .setArgument("two", 4.2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + testBuilder->setArgument("one", "1.3", errorCode).setArgument("two", "4.2", errorCode); + test.adoptInstead(testBuilder->build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{$one} et {$two}}") + .setExpected("1,3 et 4,2") + .setLocale(Locale("fr"), errorCode) + .setArgument("one", 1.3, errorCode) + .setArgument("two", 4.2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {|4.2| :number}}") + .setExpected("hello 4.2") + .setLocale(Locale("en"), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {|foo| :number}}") + .setExpected("hello NaN") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {:number}}") + .setExpected("hello NaN") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + + test.adoptInstead(testBuilder->setPattern("{hello {|4.2| :number minimumFractionDigits=2}}") + .setExpected("hello 4.20") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {|4.2| :number minimumFractionDigits=|2|}}") + .setExpected("hello 4.20") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{hello {|4.2| :number minimumFractionDigits=$foo}}") + .setExpected("hello 4.20") + .setArgument("foo", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {bar} {bar {$foo}}") + .setExpected("bar bar") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {|bar|} {bar {$foo}}") + .setExpected("bar bar") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {|bar|} {bar {$foo}}") + .setExpected("bar bar") + .setArgument("foo", "foo", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$bar} {bar {$foo}}") + .setExpected("bar foo") + .setArgument("bar", "foo", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$bar :number} {bar {$foo}}") + .setExpected("bar 4.2") + .setArgument("bar", 4.2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$bar :number minimumFractionDigits=2} {bar {$foo}}") + .setExpected("bar 4.20") + .setArgument("bar", 4.2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$bar :number minimumFractionDigits=foo} {bar {$foo}}") + .setExpected("bar 4.2") + .setArgument("bar", 4.2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$bar :number} {bar {$foo}}") + .setExpected("bar NaN") + .setArgument("bar", "foo", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$baz} let $bar = {$foo} {bar {$bar}}") + .setExpected("bar foo") + .setArgument("baz", "foo", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$foo} {bar {$foo}}") + .setExpected("bar foo") + .setArgument("foo", "foo", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$foo} let $foo = {42} {bar {$foo}}") + .setExpected("bar 42") + .setArgument("foo", "foo", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {42} let $foo = {$foo} {bar {$foo}}") + .setExpected("bar 42") + .setArgument("foo", "foo", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$foo} let $foo = {42} {bar {$foo}}") + .setExpected("bar 42") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {:unknown} let $foo = {42} {bar {$foo}}") + .setExpected("bar 42") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $x = {42} let $y = {$x} let $x = {13} {{$x} {$y}}") + .setExpected("13 42") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + +/* + Shouldn't this be "bar {$bar}"? + + test.adoptInstead(testBuilder->setPattern("let $foo = {$bar} let $bar = {$baz} {bar {$foo}}") + .setExpected("bar foo") + .setArgument("baz", "foo", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +*/ + + test.adoptInstead(testBuilder->setPattern("match {$foo :select} when |1| {one} when * {other}") + .setExpected("one") + .setArgument("foo", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} when 1 {one} when * {other}") + .setExpected("one") + .setArgument("foo", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + +/* + This case can't be tested without a way to set the "foo" argument to null + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} when 1 {one} when * {other}") + .setExpected("other") + .setArgument("foo", "", errorCode) + .setExpectedError(U_UNRESOLVED_VARIABLE_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +*/ + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} when one {one} when * {other}") + .setExpected("one") + .setArgument("foo", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} when 1 {=1} when one {one} when * {other}") + .setExpected("=1") + .setArgument("foo", "1", errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} when 1 {=1} when one {one} when * {other}") + .setExpected("=1") + .setArgument("foo", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} when one {one} when 1 {=1} when * {other}") + .setExpected("=1") + .setArgument("foo", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} {$bar :plural} when one one {one one} when one * {one other} when * * {other}") + .setExpected("one one") + .setArgument("foo", (int64_t) 1, errorCode) + .setArgument("bar", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} {$bar :plural} when one one {one one} when one * {one other} when * * {other}") + .setExpected("one other") + .setArgument("foo", (int64_t) 1, errorCode) + .setArgument("bar", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} {$bar :plural} when one one {one one} when one * {one other} when * * {other}") + .setExpected("other") + .setArgument("foo", (int64_t) 2, errorCode) + .setArgument("bar", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$bar :plural} match {$foo} when one {one} when * {other}") + .setExpected("one") + .setArgument("bar", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $foo = {$bar :plural} match {$foo} when one {one} when * {other}") + .setExpected("other") + .setArgument("bar", (int64_t) 2, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("let $bar = {$none} match {$foo :plural} when one {one} when * {{$bar}}") + .setExpected("one") + .setArgument("foo", (int64_t) 1, errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + +/* + Note: this differs from https://github.com/messageformat/messageformat/blob/e0087bff312d759b67a9129eac135d318a1f0ce7/packages/mf2-messageformat/src/__fixtures/test-messages.json#L197 + + The expected value in the test as defined there is "{$bar}". + The value should be "{$none}" per +https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution - +"When an error occurs in an expression with a variable operand and the variable refers to a local declaration, the fallback value is formatted based on the expression on the right-hand side of the declaration, rather than the expression in the selector or pattern." +*/ + test.adoptInstead(testBuilder->setPattern("let $bar = {$none} match {$foo :plural} when one {one} when * {{$bar}}") + .setExpected("{$none}") + .setArgument("foo", (int64_t) 2, errorCode) + .setExpectedError(U_UNRESOLVED_VARIABLE_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Missing '$' before `bar` + test.adoptInstead(testBuilder->setPattern("let bar = {|foo|} {{$bar}}") + .setExpected("{$bar}") + .clearArguments(errorCode) + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Missing '=' after `bar` + test.adoptInstead(testBuilder->setPattern("let $bar {|foo|} {{$bar}}") + .setExpected("foo") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + // Missing '{'/'}' around `foo` + test.adoptInstead(testBuilder->setPattern("let bar = |foo| {{$bar}}") + .setExpected("{$bar}") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{+tag}}") + .setExpected("{+tag}") + .setIgnoreError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{+tag}content}") + .setExpected("{+tag}content") + .setIgnoreError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{+tag}content{-tag}}") + .setExpected("{+tag}content{-tag}") + .setIgnoreError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{-tag}content}") + .setExpected("{-tag}content") + .setIgnoreError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{+tag foo=bar}}") + .setExpected("{+tag}") + .setIgnoreError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{+tag foo=|foo| bar=$bar}}") + .setArgument("bar", "b a r", errorCode) + .setExpected("{+tag}") + .setIgnoreError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{|foo| +markup}}") + .setExpected("{|foo|}") + .setIgnoreError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{{-tag foo=bar}}") + .setExpected("{-tag}") + .setIgnoreError() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("no braces") + .clearIgnoreError() + .setExpected("{no braces}") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("no braces {$foo}") + .setExpected("{no braces {$foo}}") + .setArgument("foo", (int64_t) 2, errorCode) + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{missing end brace") + .setExpected("missing end brace") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{missing end {$brace") + .setExpected("missing end {$brace}") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{extra} content") + .setExpected("extra") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{empty { }}") + .setExpectedError(U_SYNTAX_ERROR) + .setExpected("empty \uFFFD") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{bad {:}}") + .setExpected("bad {:}") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{unquoted {literal}}") + .setExpected("unquoted literal") + .setExpectSuccess() + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{bad {\\u0000placeholder}}") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{no-equal {|42| :number minimumFractionDigits 2}}") + .setExpected("no-equal 42.00") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{bad {:placeholder option=}}") + .setExpected("bad {:placeholder}") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{bad {:placeholder option value}}") + .setExpected("bad {:placeholder}") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{bad {:placeholder option}}") + .setExpected("bad {:placeholder}") + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{bad {$placeholder option}}") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("{no {$placeholder end}") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {} when * {foo}") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {+foo} when * {foo}") + .setExpected("foo") + .setExpectedError(U_UNKNOWN_FUNCTION_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {|foo|} when*{foo}") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match when * {foo}") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {|x|} when * foo") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {|x|} when * {foo} extra") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match |x| when * {foo}") + .clearExpected() + .setExpectedError(U_SYNTAX_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} when * * {foo}") + .clearExpected() + .setExpectedError(U_VARIANT_KEY_MISMATCH_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder->setPattern("match {$foo :plural} {$bar :plural} when * {foo}") + .clearExpected() + .setExpectedError(U_VARIANT_KEY_MISMATCH_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/messageformat2test_icu.cpp b/icu4c/source/test/intltest/messageformat2test_icu.cpp new file mode 100644 index 000000000000..36099c7d0102 --- /dev/null +++ b/icu4c/source/test/intltest/messageformat2test_icu.cpp @@ -0,0 +1,211 @@ +// © 2016 and later: Unicode, Inc. and others. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/gregocal.h" +#include "unicode/messageformat2.h" +#include "unicode/msgfmt.h" +#include "messageformat2test.h" + +using namespace icu::message2; + +/* + Tests based on ICU4J's Mf2IcuTest.java +*/ + +/* + TODO: Tests need to be unified in a single format that + both ICU4C and ICU4J can use, rather than being embedded in code. +*/ + +/* +Tests reflect the syntax specified in + + https://github.com/unicode-org/message-format-wg/commits/main/spec/message.abnf + +as of the following commit from 2023-05-09: + https://github.com/unicode-org/message-format-wg/commit/194f6efcec5bf396df36a19bd6fa78d1fa2e0867 + +*/ + +void TestMessageFormat2::testSample(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer test(testBuilder.setPattern("{There are {$count} files on {$where}}") + .setArgument("count", "abc", errorCode) + .setArgument("where", "def", errorCode) + .setExpected("There are abc files on def") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testStaticFormat(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer test(testBuilder.setPattern("{At {$when :datetime timestyle=default} on {$when :datetime datestyle=default}, \ +there was {$what} on planet {$planet :number kind=integer}.}") + .setArgument("planet", (int64_t) 7, errorCode) + .setArgument("when", (UDate) 871068000000, errorCode) + .setArgument("what", "a disturbance in the Force", errorCode) + .setExpected("At 12:20:00\u202FPM on Aug 8, 1997, there was a disturbance in the Force on planet 7.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testSimpleFormat(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + testBuilder.setPattern("{The disk \"{$diskName}\" contains {$fileCount} file(s).}"); + testBuilder.setArgument("diskName", "MyDisk", errorCode); + + LocalPointer test(testBuilder.setArgument("fileCount", (int64_t) 0, errorCode) + .setExpected("The disk \"MyDisk\" contains 0 file(s).") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setArgument("fileCount", (int64_t) 1, errorCode) + .setExpected("The disk \"MyDisk\" contains 1 file(s).") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setArgument("fileCount", (int64_t) 12, errorCode) + .setExpected("The disk \"MyDisk\" contains 12 file(s).") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testSelectFormatToPattern(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + UnicodeString pattern = "match {$userGender :select}\n\ + when female {{$userName} est all\u00E9e \u00E0 Paris.}\n\ + when * {{$userName} est all\u00E9 \u00E0 Paris.}"; + + testBuilder.setPattern(pattern); + + LocalPointer test(testBuilder.setArgument("userName", "Charlotte", errorCode) + .setArgument("userGender", "female", errorCode) + .setExpected("Charlotte est allée à Paris.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setArgument("userName", "Guillaume", errorCode) + .setArgument("userGender", "male", errorCode) + .setExpected("Guillaume est allé à Paris.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setArgument("userName", "Dominique", errorCode) + .setArgument("userGender", "unknown", errorCode) + .setExpected("Dominique est allé à Paris.") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + + +void TestMessageFormat2::testMessageFormatDateTimeSkeleton(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer cal(new GregorianCalendar(2021, Calendar::NOVEMBER, 23, 16, 42, 55, errorCode)); + CHECK_ERROR(errorCode); + UDate date = cal->getTime(errorCode); + testBuilder.setLocale(Locale::forLanguageTag("en", errorCode), errorCode); + testBuilder.setDateArgument("when", date, errorCode); + CHECK_ERROR(errorCode); + + LocalPointer test(testBuilder.setPattern("{{$when :datetime skeleton=MMMMd}}") + .setExpected("November 23") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{{$when :datetime skeleton=yMMMMdjm}}") + .setExpected("November 23, 2021 at 4:42\u202FPM") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{{$when :datetime skeleton=| yMMMMd |}}") + .setExpected("November 23, 2021") + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{{$when :datetime skeleton=yMMMMd}}") + .setExpected("23 novembre 2021") + .setLocale(Locale::forLanguageTag("fr", errorCode), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{Expiration: {$when :datetime skeleton=yMMM}!}") + .setExpected("Expiration: Nov 2021!") + .setLocale(Locale::forLanguageTag("en", errorCode), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.setPattern("{{$when :datetime pattern=|'::'yMMMMd|}}") + .setExpected("::2021November23") + .setLocale(Locale::forLanguageTag("en", errorCode), errorCode) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::testMf1Behavior(TestCase::Builder& testBuilder, IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + UDate testDate = UDate(1671782400000); // 2022-12-23 + UnicodeString user = "John"; + UnicodeString badArgumentsNames[] = { + "userX", "todayX" + }; + UnicodeString goodArgumentsNames[] = { + "user", "today" + }; + Formattable argumentsValues[] = { + Formattable(user), Formattable(testDate, Formattable::kIsDate) + }; + UnicodeString expectedGood = "Hello John, today is December 23, 2022."; + + LocalPointer mf1(new MessageFormat("Hello {user}, today is {today,date,long}.", errorCode)); + CHECK_ERROR(errorCode); + + UnicodeString result; + mf1->format(badArgumentsNames, argumentsValues, 2, result, errorCode); + assertEquals("testMf1Behavior", (UBool) true, U_SUCCESS(errorCode)); + assertEquals("old icu test", "Hello {user}, today is {today}.", result); + result.remove(); + mf1->format(goodArgumentsNames, argumentsValues, 2, result, errorCode); + assertEquals("testMf1Behavior", (UBool) true, U_SUCCESS(errorCode)); + assertEquals("old icu test", expectedGood, result); + + LocalPointer test(testBuilder.setPattern("{Hello {$user}, today is {$today :datetime datestyle=long}.}") + .setArgument(badArgumentsNames[0], user, errorCode) + .setDateArgument(badArgumentsNames[1], testDate, errorCode) + .setExpected("Hello {$user}, today is {$today}.") + .setExpectedError(U_UNRESOLVED_VARIABLE_ERROR) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); + + test.adoptInstead(testBuilder.clearArguments(errorCode) + .setExpectSuccess() + .setArgument(goodArgumentsNames[0], user, errorCode) + .setDateArgument(goodArgumentsNames[1], testDate, errorCode) + .setExpected(expectedGood) + .build(errorCode)); + TestUtils::runTestCase(*this, *test, errorCode); +} + +void TestMessageFormat2::messageFormat1Tests() { + IcuTestErrorCode errorCode(*this, "featureTests"); + + LocalPointer testBuilder(TestCase::builder(errorCode)); + testBuilder->setName("messageFormat1Tests"); + + testSample(*testBuilder, errorCode); + testStaticFormat(*testBuilder, errorCode); + testSimpleFormat(*testBuilder, errorCode); + testSelectFormatToPattern(*testBuilder, errorCode); + testMessageFormatDateTimeSkeleton(*testBuilder, errorCode); + testMf1Behavior(*testBuilder, errorCode); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h new file mode 100644 index 000000000000..37453f76f1de --- /dev/null +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -0,0 +1,319 @@ +#ifndef _TESTMESSAGEFORMAT2_UTILS +#define _TESTMESSAGEFORMAT2_UTILS + +#include "unicode/messageformat2_function_registry.h" +#include "unicode/messageformat2_macros.h" +#include "unicode/messageformat2.h" +#include "intltest.h" + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN namespace message2 { + +class TestCase : public UMemory { + public: + const UnicodeString testName; + const UnicodeString pattern; + const Locale locale; + LocalPointer arguments; + + private: + const UErrorCode expectedError; + const bool expectedNoSyntaxError; + const bool hasExpectedOutput; + const UnicodeString& expected; + const bool hasLineNumberAndOffset; + const uint32_t lineNumber; + const uint32_t offset; + const bool ignoreError; + // Function registry is not owned by the TestCase object + const FunctionRegistry* functionRegistry; + + public: + bool expectSuccess() const { + return (!ignoreError && U_SUCCESS(expectedError)); + } + bool expectFailure() const { + return (!ignoreError && U_FAILURE(expectedError)); + } + bool expectNoSyntaxError() const { + return expectedNoSyntaxError; + } + UErrorCode expectedErrorCode() const { + U_ASSERT(!expectSuccess()); + return expectedError; + } + bool lineNumberAndOffsetMatch(uint32_t actualLine, uint32_t actualOffset) const { + return (!hasLineNumberAndOffset || + ((actualLine == lineNumber) && actualOffset == offset)); + } + bool outputMatches(const UnicodeString& result) const { + return (!hasExpectedOutput || (expected == result)); + } + const UnicodeString& expectedOutput() const { + U_ASSERT(hasExpectedOutput); + return expected; + } + uint32_t getLineNumber() const { + U_ASSERT(hasLineNumberAndOffset); + return lineNumber; + } + uint32_t getOffset() const { + U_ASSERT(hasLineNumberAndOffset); + return offset; + } + bool hasCustomRegistry() const { return functionRegistry != nullptr; } + const FunctionRegistry* getCustomRegistry() const { + U_ASSERT(hasCustomRegistry()); + return functionRegistry; + } + virtual ~TestCase(); + + class Builder : public UObject { + friend class TestCase; + + public: + Builder& setName(UnicodeString name) { testName = name; return *this; } + Builder& setPattern(UnicodeString pat) { pattern = pat; return *this; } + Builder& setArgument(const UnicodeString& k, const UnicodeString& val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + arguments->add(k, val, errorCode); + return *this; + } + Builder& setArgument(const UnicodeString& k, const UnicodeString* val, int32_t count, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + U_ASSERT(val != nullptr); + arguments->add(k, val, count, errorCode); + return *this; + } + Builder& setArgument(const UnicodeString& k, double val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + arguments->addDouble(k, val, errorCode); + return *this; + } + Builder& setArgument(const UnicodeString& k, int64_t val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + arguments->addInt64(k, val, errorCode); + return *this; + } + Builder& setDateArgument(const UnicodeString& k, UDate date, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + arguments->addDate(k, date, errorCode); + return *this; + } + Builder& setDecimalArgument(const UnicodeString& k, StringPiece decimal, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + + arguments->addDecimal(k, decimal, errorCode); + return *this; + } + // val has to be uniquely owned because the copy constructor for + // a Formattable of an object doesn't work + Builder& setArgument(const UnicodeString& k, UObject* val, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + U_ASSERT(val != nullptr); + + arguments->addObject(k, val, errorCode); + return *this; + } + Builder& clearArguments(UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + if (arguments.isValid()) { + arguments.adoptInstead(MessageArguments::builder(errorCode)); + }; + return *this; + } + Builder& setExpected(UnicodeString e) { + hasExpectedOutput = true; + expected = e; + return *this; + } + Builder& clearExpected() { + hasExpectedOutput = false; + return *this; + } + Builder& setExpectedError(UErrorCode errorCode) { + expectedError = U_SUCCESS(errorCode) ? U_ZERO_ERROR : errorCode; + return *this; + } + Builder& setNoSyntaxError() { + expectNoSyntaxError = true; + return *this; + } + Builder& setExpectSuccess() { + return setExpectedError(U_ZERO_ERROR); + } + Builder& clearLocale() { + if (locale.isValid()) { + locale.adoptInstead(nullptr); + } + return *this; + } + + Builder& setLocale(const Locale& loc, UErrorCode& errorCode) { + THIS_ON_ERROR(errorCode); + Locale* l = new Locale(loc); + if (l == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } else { + locale.adoptInstead(l); + } + return *this; + } + Builder& setExpectedLineNumberAndOffset(uint32_t line, uint32_t o) { + hasLineNumberAndOffset = true; + lineNumber = line; + offset = o; + return *this; + } + Builder& setIgnoreError() { + ignoreError = true; + return *this; + } + Builder& clearIgnoreError() { + ignoreError = false; + return *this; + } + Builder& setFunctionRegistry(FunctionRegistry* reg) { + U_ASSERT(reg != nullptr); + functionRegistry.adoptInstead(reg); + return *this; + } + TestCase* build(UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + LocalPointer result(new TestCase(*this, errorCode)); + NULL_ON_ERROR(errorCode); + return result.orphan(); + } + virtual ~Builder(); + + private: + UnicodeString testName; + UnicodeString pattern; + LocalPointer locale; + LocalPointer arguments; + bool hasExpectedOutput; + UnicodeString expected; + UErrorCode expectedError; + bool expectNoSyntaxError; + bool hasLineNumberAndOffset; + uint32_t lineNumber; + uint32_t offset; + bool ignoreError; + LocalPointer functionRegistry; + + Builder(UErrorCode& errorCode) : pattern(""), arguments(MessageArguments::builder(errorCode)), hasExpectedOutput(false), expected(""), expectedError(U_ZERO_ERROR), expectNoSyntaxError(false), hasLineNumberAndOffset(false), ignoreError(false) {} + }; + + private: + TestCase(const Builder& builder, UErrorCode& errorCode) : + testName(builder.testName), + pattern(builder.pattern), + locale(!builder.locale.isValid() ? Locale::getDefault() : *builder.locale), + arguments(builder.arguments->build(errorCode)), + expectedError(builder.expectedError), + expectedNoSyntaxError(builder.expectNoSyntaxError), + hasExpectedOutput(builder.hasExpectedOutput), + expected(builder.expected), + hasLineNumberAndOffset(builder.hasLineNumberAndOffset), + lineNumber(builder.hasLineNumberAndOffset ? builder.lineNumber : 0), + offset(builder.hasLineNumberAndOffset ? builder.offset : 0), + ignoreError(builder.ignoreError), + functionRegistry(builder.functionRegistry.getAlias()) { + U_ASSERT(builder.arguments.isValid()); + // If an error is not expected, then the expected + // output should be present + U_ASSERT(expectFailure() || expectNoSyntaxError() || hasExpectedOutput); + } + public: + static TestCase::Builder* builder(UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + return new Builder(errorCode); + } +}; // class TestCase + +class TestUtils { + public: + + // Runs a single test case + static void runTestCase(IntlTest& tmsg, + const TestCase& testCase, + IcuTestErrorCode& errorCode) { + CHECK_ERROR(errorCode); + + LocalPointer mfBuilder(MessageFormatter::builder(errorCode)); + CHECK_ERROR(errorCode); + mfBuilder->setPattern(testCase.pattern).setLocale(testCase.locale); + + if (testCase.hasCustomRegistry()) { + mfBuilder->setFunctionRegistry(testCase.getCustomRegistry()); + } + UParseError parseError; + LocalPointer mf(mfBuilder->build(parseError, errorCode)); + UnicodeString result; + + if (U_SUCCESS(errorCode)) { + mf->formatToString(*(testCase.arguments), errorCode, result); + } + + if (testCase.expectNoSyntaxError()) { + if (errorCode == U_SYNTAX_ERROR) { + failSyntaxError(tmsg, testCase); + } + errorCode.reset(); + return; + } + if (testCase.expectSuccess() && U_FAILURE(errorCode)) { + failExpectedSuccess(tmsg, testCase, errorCode); + return; + } + if (testCase.expectFailure() && errorCode != testCase.expectedErrorCode()) { + failExpectedFailure(tmsg, testCase, errorCode); + return; + } + if (!testCase.lineNumberAndOffsetMatch(parseError.line, parseError.offset)) { + failWrongOffset(tmsg, testCase, parseError.line, parseError.offset); + } + if (!testCase.outputMatches(result)) { + failWrongOutput(tmsg, testCase, result); + return; + } + errorCode.reset(); + } + + static void failSyntaxError(IntlTest& tmsg, const TestCase& testCase) { + tmsg.dataerrln(testCase.testName); + tmsg.logln(testCase.testName + " failed test with pattern: " + testCase.pattern + " and error code U_SYNTAX_WARNING; expected no syntax error"); + } + + static void failExpectedSuccess(IntlTest& tmsg, const TestCase& testCase, IcuTestErrorCode& errorCode) { + tmsg.dataerrln(testCase.testName); + tmsg.logln(testCase.testName + " failed test with pattern: " + testCase.pattern + " and error code " + ((int32_t) errorCode)); + errorCode.reset(); + } + static void failExpectedFailure(IntlTest& tmsg, const TestCase& testCase, IcuTestErrorCode& errorCode) { + tmsg.dataerrln(testCase.testName); + tmsg.logln(testCase.testName + " failed test with wrong error code; pattern: " + testCase.pattern + " and error code " + ((int32_t) errorCode) + "(expected error code: " + ((int32_t) testCase.expectedErrorCode()) + " )"); + errorCode.reset(); + } + static void failWrongOutput(IntlTest& tmsg, const TestCase& testCase, const UnicodeString& result) { + tmsg.dataerrln(testCase.testName); + tmsg.logln(testCase.testName + " failed test with wrong output; pattern: " + testCase.pattern + " and expected output = " + testCase.expectedOutput() + " and actual output = " + result); + } + + static void failWrongOffset(IntlTest& tmsg, const TestCase& testCase, uint32_t actualLine, uint32_t actualOffset) { + tmsg.dataerrln("Test failed with wrong line or character offset in parse error; expected (line %d, offset %d), got (line %d, offset %d)", testCase.getLineNumber(), testCase.getOffset(), + actualLine, actualOffset); + tmsg.logln(UnicodeString(testCase.testName) + " pattern = " + testCase.pattern + " - failed by returning the wrong line number or offset in the parse error"); + } +}; // class TestUtils + +} // namespace message2 +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif