diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 73f7fa45e69f..8e8675f741a3 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -11,8 +13,10 @@ #include "unicode/messageformat2_data_model.h" #include "unicode/messageformat2_formattable.h" #include "unicode/messageformat2.h" +#include "unicode/normalizer2.h" #include "unicode/unistr.h" #include "messageformat2_allocation.h" +#include "messageformat2_checker.h" #include "messageformat2_evaluation.h" #include "messageformat2_macros.h" @@ -37,7 +41,7 @@ static Formattable evalLiteral(const Literal& lit) { // The fallback for a variable name is itself. UnicodeString str(DOLLAR); str += var; - const Formattable* val = context.getGlobal(var, errorCode); + const Formattable* val = context.getGlobal(*this, var, errorCode); if (U_SUCCESS(errorCode)) { return (FormattedPlaceholder(*val, str)); } @@ -52,9 +56,9 @@ static Formattable evalLiteral(const Literal& lit) { } [[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env, - const Operand& rand, - MessageContext& context, - UErrorCode &status) const { + const Operand& rand, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { return {}; } @@ -71,15 +75,19 @@ static Formattable evalLiteral(const Literal& lit) { // Eager vs. lazy evaluation is an open issue: // see https://github.com/unicode-org/message-format-wg/issues/299 + // NFC-normalize the variable name. See + // https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md#names-and-identifiers + const VariableName normalized = normalizeNFC(var); + // Look up the variable in the environment - if (env.has(var)) { + if (env.has(normalized)) { // `var` is a local -- look it up - const Closure& rhs = env.lookup(var); + const Closure& rhs = env.lookup(normalized); // Format the expression using the environment from the closure return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status); } // Variable wasn't found in locals -- check if it's global - FormattedPlaceholder result = evalArgument(var, context, status); + FormattedPlaceholder result = evalArgument(normalized, context, status); if (status == U_ILLEGAL_ARGUMENT_ERROR) { status = U_ZERO_ERROR; // Unbound variable -- set a resolution error @@ -761,6 +769,7 @@ void MessageFormatter::formatSelectors(MessageContext& context, const Environmen UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) { EMPTY_ON_ERROR(status); + // Create a new environment that will store closures for all local variables Environment* env = Environment::create(status); // Create a new context with the given arguments and the `errors` structure @@ -813,12 +822,14 @@ void MessageFormatter::check(MessageContext& context, const Environment& localEn // Check that variable is in scope const VariableName& var = rand.asVariable(); + UnicodeString normalized = normalizeNFC(var); + // Check local scope - if (localEnv.has(var)) { + if (localEnv.has(normalized)) { return; } // Check global scope - context.getGlobal(var, status); + context.getGlobal(*this, normalized, status); if (status == U_ILLEGAL_ARGUMENT_ERROR) { status = U_ZERO_ERROR; context.getErrors().setUnresolvedVariable(var, status); @@ -855,7 +866,10 @@ void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& // memoizing the value of localEnv up to this point // Add the LHS to the environment for checking the next declaration - env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status); + env = Environment::create(normalizeNFC(decl.getVariable()), + Closure(rhs, *env), + env, + status); CHECK_ERROR(status); } } @@ -866,3 +880,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_allocation.h b/icu4c/source/i18n/messageformat2_allocation.h index 7be27e222520..5b06d0851296 100644 --- a/icu4c/source/i18n/messageformat2_allocation.h +++ b/icu4c/source/i18n/messageformat2_allocation.h @@ -10,6 +10,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -139,6 +141,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_UTILS_H diff --git a/icu4c/source/i18n/messageformat2_arguments.cpp b/icu4c/source/i18n/messageformat2_arguments.cpp index ded3f4dda160..c43c600a2f40 100644 --- a/icu4c/source/i18n/messageformat2_arguments.cpp +++ b/icu4c/source/i18n/messageformat2_arguments.cpp @@ -3,12 +3,16 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 +#include "unicode/messageformat2.h" #include "unicode/messageformat2_arguments.h" #include "unicode/messageformat2_data_model_names.h" +#include "messageformat2_evaluation.h" #include "uvector.h" // U_ASSERT U_NAMESPACE_BEGIN @@ -22,11 +26,15 @@ namespace message2 { using Arguments = MessageArguments; - const Formattable* Arguments::getArgument(const VariableName& arg, UErrorCode& errorCode) const { + const Formattable* Arguments::getArgument(const MessageFormatter& context, + const VariableName& arg, + UErrorCode& errorCode) const { if (U_SUCCESS(errorCode)) { U_ASSERT(argsLen == 0 || arguments.isValid()); for (int32_t i = 0; i < argsLen; i++) { - if (argumentNames[i] == arg) { + UnicodeString normalized = context.normalizeNFC(argumentNames[i]); + // arg already assumed to be normalized + if (normalized == arg) { return &arguments[i]; } } @@ -57,3 +65,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_checker.cpp b/icu4c/source/i18n/messageformat2_checker.cpp index bdc5c383b6e8..535c876e42b6 100644 --- a/icu4c/source/i18n/messageformat2_checker.cpp +++ b/icu4c/source/i18n/messageformat2_checker.cpp @@ -3,12 +3,16 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 +#include "unicode/messageformat2.h" #include "messageformat2_allocation.h" #include "messageformat2_checker.h" +#include "messageformat2_evaluation.h" #include "messageformat2_macros.h" #include "uvector.h" // U_ASSERT @@ -104,6 +108,13 @@ TypeEnvironment::~TypeEnvironment() {} // --------------------- +UnicodeString Checker::normalizeNFC(const Key& k) const { + if (k.isWildcard()) { + return UnicodeString("*"); + } + return context.normalizeNFC(k.asLiteral().unquoted()); +} + static bool areDefaultKeys(const Key* keys, int32_t len) { U_ASSERT(len > 0); for (int32_t i = 0; i < len; i++) { @@ -185,7 +196,7 @@ void Checker::checkVariants(UErrorCode& status) { // This variant was already checked, // so we know keys1.len == len for (int32_t kk = 0; kk < len; kk++) { - if (!(keys[kk] == keys1[kk])) { + if (!(normalizeNFC(keys[kk]) == normalizeNFC(keys1[kk]))) { allEqual = false; break; } @@ -312,3 +323,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_checker.h b/icu4c/source/i18n/messageformat2_checker.h index 4bb0498efb99..429f79680991 100644 --- a/icu4c/source/i18n/messageformat2_checker.h +++ b/icu4c/source/i18n/messageformat2_checker.h @@ -10,6 +10,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -56,14 +58,19 @@ namespace message2 { // an explicit declaration }; // class TypeEnvironment + class MessageFormatter; + // Checks a data model for semantic errors // (Errors are defined in https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md ) class Checker { public: void check(UErrorCode&); - Checker(const MFDataModel& m, StaticErrors& e) : dataModel(m), errors(e) {} + Checker(const MFDataModel& d, StaticErrors& e, const MessageFormatter& mf) + : dataModel(d), errors(e), context(mf) {} private: + UnicodeString normalizeNFC(const Key&) const; + void requireAnnotated(const TypeEnvironment&, const Expression&, UErrorCode&); void addFreeVars(TypeEnvironment& t, const Operand&, UErrorCode&); void addFreeVars(TypeEnvironment& t, const Operator&, UErrorCode&); @@ -78,6 +85,9 @@ namespace message2 { void check(const Pattern&); const MFDataModel& dataModel; StaticErrors& errors; + + // Used for NFC normalization + const MessageFormatter& context; }; // class Checker } // namespace message2 @@ -88,6 +98,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT_CHECKER_H diff --git a/icu4c/source/i18n/messageformat2_data_model.cpp b/icu4c/source/i18n/messageformat2_data_model.cpp index 3fe5f65b5323..255444082e21 100644 --- a/icu4c/source/i18n/messageformat2_data_model.cpp +++ b/icu4c/source/i18n/messageformat2_data_model.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -918,3 +920,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_errors.cpp b/icu4c/source/i18n/messageformat2_errors.cpp index 9d1d6bab81a1..5d3d938f0203 100644 --- a/icu4c/source/i18n/messageformat2_errors.cpp +++ b/icu4c/source/i18n/messageformat2_errors.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -290,3 +292,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_errors.h b/icu4c/source/i18n/messageformat2_errors.h index f84aa7362837..085263e88b06 100644 --- a/icu4c/source/i18n/messageformat2_errors.h +++ b/icu4c/source/i18n/messageformat2_errors.h @@ -15,6 +15,8 @@ * \brief C++ API: Formats messages using the draft MessageFormat 2.0. */ +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -151,6 +153,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_ERRORS_H diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 41e4c9a8020a..3ba54c6b389c 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -190,13 +192,16 @@ PrioritizedVariant::~PrioritizedVariant() {} errors.checkErrors(status); } - const Formattable* MessageContext::getGlobal(const VariableName& v, UErrorCode& errorCode) const { - return arguments.getArgument(v, errorCode); + const Formattable* MessageContext::getGlobal(const MessageFormatter& context, + const VariableName& v, + UErrorCode& errorCode) const { + return arguments.getArgument(context, v, errorCode); } MessageContext::MessageContext(const MessageArguments& args, const StaticErrors& e, UErrorCode& status) : arguments(args), errors(e, status) {} + MessageContext::~MessageContext() {} } // namespace message2 @@ -205,3 +210,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index b8ae0242367d..ae8b4a08a3bd 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -14,6 +14,7 @@ * \file * \brief C++ API: Formats messages using the draft MessageFormat 2.0. */ +#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_FORMATTING @@ -174,11 +175,15 @@ namespace message2 { // The context contains all the information needed to process // an entire message: arguments, formatter cache, and error list + class MessageFormatter; + class MessageContext : public UMemory { public: MessageContext(const MessageArguments&, const StaticErrors&, UErrorCode&); - const Formattable* getGlobal(const VariableName&, UErrorCode&) const; + const Formattable* getGlobal(const MessageFormatter&, + const VariableName&, + UErrorCode&) const; // If any errors were set, update `status` accordingly void checkErrors(UErrorCode& status) const; @@ -191,6 +196,7 @@ namespace message2 { const MessageArguments& arguments; // External message arguments // Errors accumulated during parsing/formatting DynamicErrors errors; + }; // class MessageContext } // namespace message2 @@ -201,6 +207,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_EVALUATION_H diff --git a/icu4c/source/i18n/messageformat2_formattable.cpp b/icu4c/source/i18n/messageformat2_formattable.cpp index 3152ccb44fd8..4e2df49aeccf 100644 --- a/icu4c/source/i18n/messageformat2_formattable.cpp +++ b/icu4c/source/i18n/messageformat2_formattable.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -336,3 +338,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index 8d17ae49b99a..82f2191ba1a6 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -116,6 +118,24 @@ namespace message2 { // MessageFormatter + // Returns the NFC-normalized version of s, returning s itself + // if it's already normalized. + UnicodeString MessageFormatter::normalizeNFC(const UnicodeString& s) const { + UErrorCode status = U_ZERO_ERROR; + // Check if string is already normalized + UNormalizationCheckResult result = nfcNormalizer->quickCheck(s, status); + // If so, return it + if (U_SUCCESS(status) && result == UNORM_YES) { + return s; + } + // Otherwise, normalize it + UnicodeString normalized = nfcNormalizer->normalize(s, status); + if (U_FAILURE(status)) { + return {}; + } + return normalized; + } + MessageFormatter::MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &success) : locale(builder.locale), customMFFunctionRegistry(builder.customMFFunctionRegistry) { CHECK_ERROR(success); @@ -163,6 +183,8 @@ namespace message2 { errors = errorsNew.orphan(); } + nfcNormalizer = Normalizer2::getNFCInstance(success); + // Note: we currently evaluate variables lazily, // without memoization. This call is still necessary // to check out-of-scope uses of local variables in @@ -170,7 +192,7 @@ namespace message2 { // only be checked when arguments are known) // Check for resolution errors - Checker(dataModel, *errors).check(success); + Checker(dataModel, *errors, *this).check(success); } void MessageFormatter::cleanup() noexcept { @@ -191,6 +213,7 @@ namespace message2 { signalErrors = other.signalErrors; errors = other.errors; other.errors = nullptr; + nfcNormalizer = other.nfcNormalizer; return *this; } @@ -352,3 +375,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index 17955760ecfb..b4c33544dc92 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -1242,3 +1244,4 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 733fc5e945d5..46845976384e 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -10,6 +10,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -226,6 +228,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_INTERNAL_H diff --git a/icu4c/source/i18n/messageformat2_macros.h b/icu4c/source/i18n/messageformat2_macros.h index f06ed1a5a977..20e81377d4d5 100644 --- a/icu4c/source/i18n/messageformat2_macros.h +++ b/icu4c/source/i18n/messageformat2_macros.h @@ -10,6 +10,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -97,6 +99,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_MACROS_H diff --git a/icu4c/source/i18n/messageformat2_parser.cpp b/icu4c/source/i18n/messageformat2_parser.cpp index 22cb52266eaa..f435869cc48c 100644 --- a/icu4c/source/i18n/messageformat2_parser.cpp +++ b/icu4c/source/i18n/messageformat2_parser.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -1926,3 +1928,4 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_parser.h b/icu4c/source/i18n/messageformat2_parser.h index b62cbe9200b9..c0ed714ca6de 100644 --- a/icu4c/source/i18n/messageformat2_parser.h +++ b/icu4c/source/i18n/messageformat2_parser.h @@ -16,6 +16,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -175,6 +177,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT_PARSER_H diff --git a/icu4c/source/i18n/messageformat2_serializer.cpp b/icu4c/source/i18n/messageformat2_serializer.cpp index b2765f5acf43..6005717510a7 100644 --- a/icu4c/source/i18n/messageformat2_serializer.cpp +++ b/icu4c/source/i18n/messageformat2_serializer.cpp @@ -3,6 +3,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -285,3 +287,4 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/i18n/messageformat2_serializer.h b/icu4c/source/i18n/messageformat2_serializer.h index 1b97b3b93008..f190b255f0d3 100644 --- a/icu4c/source/i18n/messageformat2_serializer.h +++ b/icu4c/source/i18n/messageformat2_serializer.h @@ -10,6 +10,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -63,6 +65,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT_SERIALIZER_H diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index c5459f042f40..2890985fd2e2 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -8,6 +8,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -20,6 +22,7 @@ #include "unicode/messageformat2_arguments.h" #include "unicode/messageformat2_data_model.h" #include "unicode/messageformat2_function_registry.h" +#include "unicode/normalizer2.h" #include "unicode/unistr.h" #ifndef U_HIDE_DEPRECATED_API @@ -325,6 +328,8 @@ namespace message2 { private: friend class Builder; + friend class Checker; + friend class MessageArguments; friend class MessageContext; MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status); @@ -352,6 +357,9 @@ namespace message2 { void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; // Formatting methods + + // Used for normalizing variable names and keys for comparison + UnicodeString normalizeNFC(const UnicodeString&) const; [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; // Formats a call to a formatting function @@ -445,6 +453,10 @@ namespace message2 { // formatting methods return best-effort output. // The default is false. bool signalErrors = false; + + // Used for implementing normalizeNFC() + const Normalizer2* nfcNormalizer = nullptr; + }; // class MessageFormatter } // namespace message2 @@ -457,6 +469,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_H diff --git a/icu4c/source/i18n/unicode/messageformat2_arguments.h b/icu4c/source/i18n/unicode/messageformat2_arguments.h index c43d96191f16..07c96f892bca 100644 --- a/icu4c/source/i18n/unicode/messageformat2_arguments.h +++ b/icu4c/source/i18n/unicode/messageformat2_arguments.h @@ -8,6 +8,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -43,7 +45,7 @@ template class U_I18N_API LocalArray; namespace message2 { - class MessageContext; + class MessageFormatter; // Arguments // ---------- @@ -112,7 +114,9 @@ namespace message2 { private: friend class MessageContext; - const Formattable* getArgument(const data_model::VariableName&, UErrorCode&) const; + const Formattable* getArgument(const MessageFormatter&, + const data_model::VariableName&, + UErrorCode&) const; // Avoids using Hashtable so that code constructing a Hashtable // doesn't have to appear in this header file @@ -131,6 +135,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_ARGUMENTS_H diff --git a/icu4c/source/i18n/unicode/messageformat2_data_model.h b/icu4c/source/i18n/unicode/messageformat2_data_model.h index 0c836af1bdfd..cacc271ee6cd 100644 --- a/icu4c/source/i18n/unicode/messageformat2_data_model.h +++ b/icu4c/source/i18n/unicode/messageformat2_data_model.h @@ -8,6 +8,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -2592,6 +2594,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT_DATA_MODEL_H diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 8a779adb9ab3..53c500dac2f6 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -8,6 +8,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -1010,6 +1012,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_FORMATTABLE_H diff --git a/icu4c/source/i18n/unicode/messageformat2_function_registry.h b/icu4c/source/i18n/unicode/messageformat2_function_registry.h index b8429e3b83aa..37690d5e04a1 100644 --- a/icu4c/source/i18n/unicode/messageformat2_function_registry.h +++ b/icu4c/source/i18n/unicode/messageformat2_function_registry.h @@ -8,6 +8,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -422,6 +424,8 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // MESSAGEFORMAT2_FUNCTION_REGISTRY_H diff --git a/icu4c/source/test/intltest/itformat.cpp b/icu4c/source/test/intltest/itformat.cpp index 074cb993b118..49fa62f9f5c2 100644 --- a/icu4c/source/test/intltest/itformat.cpp +++ b/icu4c/source/test/intltest/itformat.cpp @@ -288,8 +288,10 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam callTest(*test, par); } break; +#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_MF2 TESTCLASS(60,TestMessageFormat2); +#endif #endif default: name = ""; break; //needed to end loop } diff --git a/icu4c/source/test/intltest/messageformat2test.cpp b/icu4c/source/test/intltest/messageformat2test.cpp index 68a10191318a..cd3ae788f6c2 100644 --- a/icu4c/source/test/intltest/messageformat2test.cpp +++ b/icu4c/source/test/intltest/messageformat2test.cpp @@ -2,6 +2,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -400,3 +402,4 @@ TestCase::Builder::~Builder() {} #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/test/intltest/messageformat2test.h b/icu4c/source/test/intltest/messageformat2test.h index 6ac2f1584e19..9fd5d253831d 100644 --- a/icu4c/source/test/intltest/messageformat2test.h +++ b/icu4c/source/test/intltest/messageformat2test.h @@ -7,6 +7,8 @@ #include "unicode/rep.h" #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -188,4 +190,6 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif diff --git a/icu4c/source/test/intltest/messageformat2test_custom.cpp b/icu4c/source/test/intltest/messageformat2test_custom.cpp index b498be791ca9..90210e0ed061 100644 --- a/icu4c/source/test/intltest/messageformat2test_custom.cpp +++ b/icu4c/source/test/intltest/messageformat2test_custom.cpp @@ -2,6 +2,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -730,3 +732,5 @@ void TestMessageFormat2::testMessageRefFormatter(IcuTestErrorCode& errorCode) { #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/test/intltest/messageformat2test_icu.cpp b/icu4c/source/test/intltest/messageformat2test_icu.cpp index d1dfa389ca8c..57361e3ccf44 100644 --- a/icu4c/source/test/intltest/messageformat2test_icu.cpp +++ b/icu4c/source/test/intltest/messageformat2test_icu.cpp @@ -2,6 +2,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -157,3 +159,5 @@ void TestMessageFormat2::messageFormat1Tests() { #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp index ddf93da632ce..43dbe1b3db01 100644 --- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp +++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp @@ -2,6 +2,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -309,6 +311,9 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { runTestsFromJsonFile(*this, "spec/functions/time.json", errorCode); // Other tests (non-spec) + // TODO: Delete this file after https://github.com/unicode-org/message-format-wg/pull/904 + // lands and the tests here are updated from the spec repo + runTestsFromJsonFile(*this, "normalization.json", errorCode); runTestsFromJsonFile(*this, "more-functions.json", errorCode); runTestsFromJsonFile(*this, "valid-tests.json", errorCode); runTestsFromJsonFile(*this, "resolution-errors.json", errorCode); @@ -358,3 +363,5 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) { #endif /* #if !UCONFIG_NO_MF2 */ #endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/test/intltest/messageformat2test_utils.h b/icu4c/source/test/intltest/messageformat2test_utils.h index c4ad251c7f48..336211d6e0a3 100644 --- a/icu4c/source/test/intltest/messageformat2test_utils.h +++ b/icu4c/source/test/intltest/messageformat2test_utils.h @@ -6,6 +6,8 @@ #include "unicode/utypes.h" +#if !UCONFIG_NO_NORMALIZATION + #if !UCONFIG_NO_FORMATTING #if !UCONFIG_NO_MF2 @@ -344,4 +346,6 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + #endif diff --git a/testdata/message2/normalization.json b/testdata/message2/normalization.json new file mode 100644 index 000000000000..283e3dac8a82 --- /dev/null +++ b/testdata/message2/normalization.json @@ -0,0 +1,67 @@ +{ + "$schema": "https://raw.githubusercontent.com/unicode-org/message-format-wg/main/test/schemas/v0/tests.schema.json", + "scenario": "Syntax", + "description": "Test cases that do not depend on any registry definitions.", + "defaultTestProperties": { + "locale": "en-US" + }, + "tests": [ + { + "description": "NFC: text is not normalized", + "src": "\u1E0A\u0323", + "exp": "\u1E0A\u0323" + }, + { + "description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is", + "src": ".local $\u0044\u0323\u0307 = {foo} {{{$\u1E0c\u0307}}}", + "exp": "foo" + }, + { + "description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't", + "src": ".local $\u1E0c\u0307 = {foo} {{{$\u0044\u0323\u0307}}}", + "exp": "foo" + }, + { + "description": "NFC: variables are compared to each other as-if normalized; decl is normalized, use isn't", + "src": ".input {$\u1E0c\u0307} {{{$\u0044\u0323\u0307}}}", + "params": [{"name": "\u1E0c\u0307", "value": "foo"}], + "exp": "foo" + }, + { + "description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is", + "src": ".input {$\u0044\u0323\u0307} {{{$\u1E0c\u0307}}}", + "params": [{"name": "\u0044\u0323\u0307", "value": "foo"}], + "exp": "foo" + }, + { + "description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; reordering", + "src": ".local $\u0044\u0307\u0323 = {foo} {{{$\u1E0c\u0307}}}", + "exp": "foo" + }, + { + "description": "NFC: variables are compared to each other as-if normalized; decl is non-normalized, use is; special case mapping", + "src": ".local $\u0041\u030A\u0301 = {foo} {{{$\u01FA}}}", + "exp": "foo" + }, + { + "description": "NFC: keys are normalized", + "src": ".local $x = {\u1E0C\u0307 :string} .match {$x} \u1E0A\u0323 {{Right}} * {{Wrong}}", + "exp": "Right" + }, + { + "description": "NFC: keys are normalized (unquoted)", + "src": ".local $x = {\u1E0A\u0323 :string} .match {$x} \u1E0A\u0323 {{Not normalized}} \u1E0C\u0307 {{Normalized}} * {{Wrong}}", + "expErrors": [{"type": "duplicate-variant"}] + }, + { + "description": "NFC: keys are normalized (quoted)", + "src": ".local $x = {\u1E0A\u0323 :string} .match {$x} |\u1E0A\u0323| {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}", + "expErrors": [{"type": "duplicate-variant"}] + }, + { + "description": "NFC: keys are normalized (mixed)", + "src": ".local $x = {\u1E0A\u0323 :string} .match {$x} \u1E0A\u0323 {{Not normalized}} |\u1E0C\u0307| {{Normalized}} * {{Wrong}}", + "expErrors": [{"type": "duplicate-variant"}] + } +] +}