From 076ee465a03f5f7db3fed90fe78b3c24351da76d Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Wed, 25 Sep 2024 16:43:05 -0700 Subject: [PATCH] ICU-22908 MF2: Finish updating spec tests and implement required test functions Implement :test:format, :test:select, and :test:function, which are required by the new `pattern-selection.json` tests. Change the internal value representation in the formatter in order to support some of the test cases (binding the results of selectors to a variable). --- icu4c/source/common/unicode/utypes.h | 3 +- icu4c/source/common/utypes.cpp | 3 +- icu4c/source/i18n/messageformat2.cpp | 407 ++++++------------ .../source/i18n/messageformat2_evaluation.cpp | 248 ++++++++++- icu4c/source/i18n/messageformat2_evaluation.h | 70 +-- .../source/i18n/messageformat2_formatter.cpp | 13 +- .../i18n/messageformat2_function_registry.cpp | 311 ++++++++++++- ...essageformat2_function_registry_internal.h | 54 +++ icu4c/source/i18n/unicode/messageformat2.h | 47 +- .../i18n/unicode/messageformat2_formattable.h | 9 +- icu4c/source/test/depstest/depstest.py | 3 + testdata/message2/duplicate-declarations.json | 18 +- .../message2/icu-test-previous-release.json | 16 +- testdata/message2/tricky-declarations.json | 2 - 14 files changed, 808 insertions(+), 396 deletions(-) diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index 0151ebd47015..ecdee5164316 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -598,12 +598,13 @@ typedef enum UErrorCode { U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */ + U_MF_BAD_OPTION, /**< An option value provided to a function does not have the required form for that option. @internal ICU 77 technology preview @deprecated This API is for technology preview only. */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT = 0x10120, + U_FMT_PARSE_ERROR_LIMIT = 0x10121, #endif // U_HIDE_DEPRECATED_API /* diff --git a/icu4c/source/common/utypes.cpp b/icu4c/source/common/utypes.cpp index 4602314147f1..4d4c1f81b5e6 100644 --- a/icu4c/source/common/utypes.cpp +++ b/icu4c/source/common/utypes.cpp @@ -140,7 +140,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_MF_MISSING_SELECTOR_ANNOTATION_ERROR", "U_MF_DUPLICATE_DECLARATION_ERROR", "U_MF_OPERAND_MISMATCH_ERROR", - "U_MF_DUPLICATE_VARIANT_ERROR" + "U_MF_DUPLICATE_VARIANT_ERROR", + "U_MF_BAD_OPTION" }; static const char * const diff --git a/icu4c/source/i18n/messageformat2.cpp b/icu4c/source/i18n/messageformat2.cpp index 9de6088a51ce..3b993a71f296 100644 --- a/icu4c/source/i18n/messageformat2.cpp +++ b/icu4c/source/i18n/messageformat2.cpp @@ -55,16 +55,16 @@ static Formattable evalLiteral(const Literal& lit) { return FormattedPlaceholder(evalLiteral(lit), lit.quoted()); } -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env, - const Operand& rand, - MessageContext& context, - UErrorCode &status) const { +[[nodiscard]] InternalValue* MessageFormatter::formatOperand(const Environment& env, + const Operand& rand, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { return {}; } if (rand.isNull()) { - return FormattedPlaceholder(); + return create(InternalValue(FormattedPlaceholder()), status); } if (rand.isVariable()) { // Check if it's local or global @@ -96,12 +96,12 @@ static Formattable evalLiteral(const Literal& lit) { // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution UnicodeString str(DOLLAR); str += var; - return FormattedPlaceholder(str); + return create(InternalValue(FormattedPlaceholder(str)), status); } - return result; + return create(InternalValue(std::move(result)), status); } else { U_ASSERT(rand.isLiteral()); - return formatLiteral(rand.asLiteral()); + return create(InternalValue(formatLiteral(rand.asLiteral())), status); } } @@ -122,28 +122,32 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // Options are fully evaluated before calling the function // Format the operand - FormattedPlaceholder rhsVal = formatOperand(env, v, context, status); + LocalPointer rhsVal(formatOperand(env, v, context, status)); if (U_FAILURE(status)) { return {}; } - if (!rhsVal.isFallback()) { - resolvedOpt.adoptInstead(create(ResolvedFunctionOption(k, rhsVal.asFormattable()), status)); - if (U_FAILURE(status)) { - return {}; - } - optionsVector->adoptElement(resolvedOpt.orphan(), status); + // Note: this means option values are "eagerly" evaluated. + // Currently, options don't have options. This will be addressed by the + // full FormattedPlaceholder redesign. + FormattedPlaceholder optValue = rhsVal->forceFormatting(context.getErrors(), status); + resolvedOpt.adoptInstead(create + (ResolvedFunctionOption(k, + optValue.asFormattable()), + status)); + if (U_FAILURE(status)) { + return {}; } + optionsVector->adoptElement(resolvedOpt.orphan(), status); } - return FunctionOptions(std::move(*optionsVector), status); } // Overload that dispatches on argument type. Syntax doesn't provide for options in this case. -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const { +[[nodiscard]] InternalValue* MessageFormatter::evalFunctionCall(FormattedPlaceholder&& argument, + MessageContext& context, + UErrorCode& status) const { if (U_FAILURE(status)) { - return {}; + return nullptr; } // These cases should have been checked for already @@ -161,11 +165,11 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O // No formatter for this type -- follow default behavior break; } - return evalFormatterCall(functionName, - std::move(argument), - FunctionOptions(), - context, - status); + return evalFunctionCall(functionName, + create(std::move(argument), status), + FunctionOptions(), + context, + status); } default: { // TODO: The array case isn't handled yet; not sure whether it's desirable @@ -175,104 +179,76 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O } // No formatter for this type, or it's a primitive type (which will be formatted later) // -- just return the argument itself - return std::move(argument); + return create(std::move(argument), status); } // Overload that dispatches on function name -[[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const { +// Adopts `arg` +[[nodiscard]] InternalValue* MessageFormatter::evalFunctionCall(const FunctionName& functionName, + InternalValue* arg_, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const { if (U_FAILURE(status)) { return {}; } - DynamicErrors& errs = context.getErrors(); - - UnicodeString fallback(COLON); - fallback += functionName; - if (!argument.isNullOperand()) { - fallback = argument.fallback; - } + LocalPointer arg(arg_); + // Look up the formatter or selector + LocalPointer formatterImpl(nullptr); + LocalPointer selectorImpl(nullptr); if (isFormatter(functionName)) { - LocalPointer formatterImpl(getFormatter(functionName, status)); - if (U_FAILURE(status)) { - if (status == U_MF_FORMATTING_ERROR) { - errs.setFormattingError(functionName, status); - status = U_ZERO_ERROR; - return {}; - } - if (status == U_MF_UNKNOWN_FUNCTION_ERROR) { - errs.setUnknownFunction(functionName, status); - status = U_ZERO_ERROR; - return {}; - } - // Other errors are non-recoverable - return {}; - } - U_ASSERT(formatterImpl != nullptr); - - UErrorCode savedStatus = status; - FormattedPlaceholder result = formatterImpl->format(std::move(argument), std::move(options), status); - // Update errors - if (savedStatus != status) { - if (U_FAILURE(status)) { - if (status == U_MF_OPERAND_MISMATCH_ERROR) { - status = U_ZERO_ERROR; - errs.setOperandMismatchError(functionName, status); - } else { - status = U_ZERO_ERROR; - // Convey any error generated by the formatter - // as a formatting error, except for operand mismatch errors - errs.setFormattingError(functionName, status); - } - return FormattedPlaceholder(fallback); - } else { - // Ignore warnings - status = savedStatus; - } - } - // Ignore the output if any errors occurred - if (errs.hasFormattingError()) { - return FormattedPlaceholder(fallback); - } - return result; + formatterImpl.adoptInstead(getFormatter(functionName, status)); + U_ASSERT(U_SUCCESS(status)); } - // No formatter with this name -- set error if (isSelector(functionName)) { - errs.setFormattingError(functionName, status); - } else { - errs.setUnknownFunction(functionName, status); + selectorImpl.adoptInstead(getSelector(context, functionName, status)); + U_ASSERT(U_SUCCESS(status)); + } + if (formatterImpl == nullptr && selectorImpl == nullptr) { + // Unknown function error + context.getErrors().setUnknownFunction(functionName, status); + + if (arg->hasNullOperand()) { + // Non-selector used as selector; an error would have been recorded earlier + UnicodeString fallback(COLON); + fallback += functionName; + return new InternalValue(FormattedPlaceholder(fallback)); + } else { + return new InternalValue(FormattedPlaceholder(arg->getFallback())); + } } - return FormattedPlaceholder(fallback); + return new InternalValue(arg.orphan(), + std::move(options), + functionName, + formatterImpl.isValid() ? formatterImpl.orphan() : nullptr, + selectorImpl.isValid() ? selectorImpl.orphan() : nullptr); } // Formats an expression using `globalEnv` for the values of variables -[[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { +[[nodiscard]] InternalValue* MessageFormatter::formatExpression(const Environment& globalEnv, + const Expression& expr, + MessageContext& context, + UErrorCode &status) const { if (U_FAILURE(status)) { return {}; } const Operand& rand = expr.getOperand(); // Format the operand (formatOperand handles the case of a null operand) - FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status); + LocalPointer randVal(formatOperand(globalEnv, rand, context, status)); - // Don't call the function on error values - if (randVal.isFallback()) { - return randVal; - } + FormattedPlaceholder maybeRand = randVal->takeArgument(status); - if (!expr.isFunctionCall()) { + if (!expr.isFunctionCall() && U_SUCCESS(status)) { // Dispatch based on type of `randVal` - return evalFormatterCall(std::move(randVal), - context, - status); - } else { + if (maybeRand.isFallback()) { + return randVal.orphan(); + } + return evalFunctionCall(std::move(maybeRand), context, status); + } else if (expr.isFunctionCall()) { + status = U_ZERO_ERROR; const Operator* rator = expr.getOperator(status); U_ASSERT(U_SUCCESS(status)); const FunctionName& functionName = rator->getFunctionName(); @@ -281,19 +257,14 @@ FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const O FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status); // Call the formatter function - // The fallback for a nullary function call is the function name - UnicodeString fallback; - if (rand.isNull()) { - fallback = UnicodeString(COLON); - fallback += functionName; - } else { - fallback = randVal.fallback; - } - return evalFormatterCall(functionName, - std::move(randVal), - std::move(resolvedOptions), - context, - status); + return evalFunctionCall(functionName, + randVal.orphan(), + std::move(resolvedOptions), + context, + status); + } else { + status = U_ZERO_ERROR; + return randVal.orphan(); } } @@ -309,11 +280,13 @@ void MessageFormatter::formatPattern(MessageContext& context, const Environment& // Markup is ignored } else { // Format the expression - FormattedPlaceholder partVal = formatExpression(globalEnv, part.contents(), context, status); - // Force full evaluation, e.g. applying default formatters to + LocalPointer partVal( + formatExpression(globalEnv, part.contents(), context, status)); + FormattedPlaceholder partResult = partVal->forceFormatting(context.getErrors(), + status); + // Force full evaluation, e.g. applying default formatters to // unformatted input (or formatting numbers as strings) - UnicodeString partResult = partVal.formatToString(locale, status); - result += partResult; + result += partResult.formatToString(locale, status); // Handle formatting errors. `formatToString()` can't take a context and thus can't // register an error directly if (status == U_MF_FORMATTING_ERROR) { @@ -342,8 +315,8 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // 2. For each expression exp of the message's selectors for (int32_t i = 0; i < dataModel.numSelectors(); i++) { // 2i. Let rv be the resolved value of exp. - ResolvedSelector rv = formatSelector(env, selectors[i], context, status); - if (rv.hasSelector()) { + LocalPointer rv(formatOperand(env, Operand(selectors[i]), context, status)); + if (rv->canSelect()) { // 2ii. If selection is supported for rv: // (True if this code has been reached) } else { @@ -352,17 +325,17 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // Append nomatch as the last element of the list res. // Emit a Selection Error. // (Note: in this case, rv, being a fallback, serves as `nomatch`) - #if U_DEBUG - const DynamicErrors& err = context.getErrors(); - U_ASSERT(err.hasError()); - U_ASSERT(rv.argument().isFallback()); - #endif + DynamicErrors& err = context.getErrors(); + err.setSelectorError(rv->getFunctionName(), status); + rv.adoptInstead(new InternalValue(FormattedPlaceholder(rv->getFallback()))); + if (!rv.isValid()) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } } // 2ii(a). Append rv as the last element of the list res. // (Also fulfills 2iii) - LocalPointer v(create(std::move(rv), status)); - CHECK_ERROR(status); - res.adoptElement(v.orphan(), status); + res.adoptElement(rv.orphan(), status); } } @@ -370,18 +343,17 @@ void MessageFormatter::resolveSelectors(MessageContext& context, const Environme // `keys` and `matches` are vectors of strings void MessageFormatter::matchSelectorKeys(const UVector& keys, MessageContext& context, - ResolvedSelector&& rv, + InternalValue* rv, // Does not adopt `rv` UVector& keysOut, UErrorCode& status) const { CHECK_ERROR(status); - if (!rv.hasSelector()) { + if (U_FAILURE(status)) { // Return an empty list of matches + status = U_ZERO_ERROR; return; } - auto selectorImpl = rv.getSelector(); - U_ASSERT(selectorImpl != nullptr); UErrorCode savedStatus = status; // Convert `keys` to an array @@ -408,15 +380,17 @@ void MessageFormatter::matchSelectorKeys(const UVector& keys, int32_t prefsLen = 0; // Call the selector - selectorImpl->selectKey(rv.takeArgument(), rv.takeOptions(), - adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen, - status); + FunctionName name = rv->getFunctionName(); + rv->forceSelection(context.getErrors(), + adoptedKeys.getAlias(), keysLen, + adoptedPrefs.getAlias(), prefsLen, + status); // Update errors if (savedStatus != status) { if (U_FAILURE(status)) { status = U_ZERO_ERROR; - context.getErrors().setSelectorError(rv.getSelectorName(), status); + context.getErrors().setSelectorError(name, status); } else { // Ignore warnings status = savedStatus; @@ -479,7 +453,7 @@ void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, } // 2iii. Let `rv` be the resolved value at index `i` of `res`. U_ASSERT(i < res.size()); - ResolvedSelector rv = std::move(*(static_cast(res[i]))); + InternalValue* rv = static_cast(res[i]); // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys) LocalPointer matches(createUVector(status)); matchSelectorKeys(*keys, context, std::move(rv), *matches, status); @@ -612,135 +586,13 @@ void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCo // 7. Select the pattern of `var` } - -// Evaluate the operand -ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, - const Operand& rand, - MessageContext& context, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - if (rand.isNull()) { - return ResolvedSelector(FormattedPlaceholder()); - } - - if (rand.isLiteral()) { - return ResolvedSelector(formatLiteral(rand.asLiteral())); - } - - // Must be variable - return resolveVariables(env, rand.asVariable(), context, status); -} - -ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, - const VariableName& var, - MessageContext& context, - UErrorCode &status) const { - // Resolve the variable - if (env.has(var)) { - const Closure& referent = env.lookup(var); - // Resolve the referent - return resolveVariables(referent.getEnv(), referent.getExpr(), context, status); - } - // Either this is a global var or an unbound var -- - // either way, it can't be bound to a function call. - // Check globals - FormattedPlaceholder val = evalArgument(var, context, status); - if (status == U_ILLEGAL_ARGUMENT_ERROR) { - status = U_ZERO_ERROR; - // Unresolved variable -- could be a previous warning. Nothing to resolve - U_ASSERT(context.getErrors().hasUnresolvedVariableError()); - return ResolvedSelector(FormattedPlaceholder(var)); - } - // Pass through other errors - return ResolvedSelector(std::move(val)); -} - -// Evaluate the expression except for not performing the top-level function call -// (which is expected to be a selector, but may not be, in error cases) -ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, - const Expression& expr, - MessageContext& context, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - // Function call -- resolve the operand and options - if (expr.isFunctionCall()) { - const Operator* rator = expr.getOperator(status); - U_ASSERT(U_SUCCESS(status)); - // Already checked that rator is non-reserved - const FunctionName& selectorName = rator->getFunctionName(); - if (isSelector(selectorName)) { - auto selector = getSelector(context, selectorName, status); - if (U_SUCCESS(status)) { - FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status); - // Operand may be the null argument, but resolveVariables() handles that - FormattedPlaceholder argument = formatOperand(env, expr.getOperand(), context, status); - return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), std::move(argument)); - } - } else if (isFormatter(selectorName)) { - context.getErrors().setSelectorError(selectorName, status); - } else { - context.getErrors().setUnknownFunction(selectorName, status); - } - // Non-selector used as selector; an error would have been recorded earlier - UnicodeString fallback(COLON); - fallback += selectorName; - if (!expr.getOperand().isNull()) { - fallback = formatOperand(env, expr.getOperand(), context, status).fallback; - } - return ResolvedSelector(FormattedPlaceholder(fallback)); - } else { - // Might be a variable reference, so expand one more level of variable - return resolveVariables(env, expr.getOperand(), context, status); - } -} - -ResolvedSelector MessageFormatter::formatSelector(const Environment& globalEnv, - const VariableName& var, - MessageContext& context, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return {}; - } - - // Resolve expression to determine if it's a function call - ResolvedSelector exprResult = resolveVariables(globalEnv, var, context, status); - - DynamicErrors& err = context.getErrors(); - - // If there is a selector, then `resolveVariables()` recorded it in the context - if (exprResult.hasSelector()) { - // Check if there was an error - if (exprResult.argument().isFallback()) { - // Use a null expression if it's a syntax or data model warning; - // create a valid (non-fallback) formatted placeholder from the - // fallback string otherwise - if (err.hasSyntaxError() || err.hasDataModelError()) { - return ResolvedSelector(FormattedPlaceholder()); // Null operand - } else { - return ResolvedSelector(exprResult.takeArgument()); - } - } - return exprResult; - } - - // No selector was found; error should already have been set - U_ASSERT(err.hasMissingSelectorAnnotationError() || err.hasUnknownFunctionError() || err.hasSelectorError()); - return ResolvedSelector(FormattedPlaceholder(exprResult.argument().fallback)); -} - void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const { CHECK_ERROR(status); // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection // Resolve Selectors - // res is a vector of FormattedPlaceholders + // res is a vector of InternalValues LocalPointer res(createUVector(status)); CHECK_ERROR(status); resolveSelectors(context, env, status, *res); @@ -781,29 +633,38 @@ void MessageFormatter::formatSelectors(MessageContext& context, const Environmen UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) { EMPTY_ON_ERROR(status); - // Create a new environment that will store closures for all local variables Environment* env = Environment::create(status); + // Create a new context with the given arguments and the `errors` structure MessageContext context(arguments, *errors, status); - - // Check for unresolved variable errors - checkDeclarations(context, env, status); - LocalPointer globalEnv(env); - UnicodeString result; - if (dataModel.hasPattern()) { - formatPattern(context, *globalEnv, dataModel.getPattern(), status, result); - } else { - // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value - // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection - const DynamicErrors& err = context.getErrors(); - if (err.hasSyntaxError() || err.hasDataModelError()) { - result += REPLACEMENT; + + if (!(errors->hasSyntaxError() || errors->hasDataModelError())) { + // Create a new environment that will store closures for all local variables + // Check for unresolved variable errors + // checkDeclarations needs a reference to the pointer to the environment + // since it uses its `env` argument as an out-parameter. So it needs to be + // temporarily not a LocalPointer... + Environment* env(Environment::create(status)); + checkDeclarations(context, env, status); + // ...and then it's adopted to avoid leaks + LocalPointer globalEnv(env); + + if (dataModel.hasPattern()) { + formatPattern(context, *globalEnv, dataModel.getPattern(), status, result); } else { - formatSelectors(context, *globalEnv, status, result); + // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value + // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection + const DynamicErrors& err = context.getErrors(); + if (err.hasSyntaxError() || err.hasDataModelError()) { + result += REPLACEMENT; + } else { + formatSelectors(context, *globalEnv, status, result); + } } } + // Update status according to all errors seen while formatting if (signalErrors) { context.checkErrors(status); diff --git a/icu4c/source/i18n/messageformat2_evaluation.cpp b/icu4c/source/i18n/messageformat2_evaluation.cpp index 3ba54c6b389c..fcccbf5ae5e7 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.cpp +++ b/icu4c/source/i18n/messageformat2_evaluation.cpp @@ -91,34 +91,53 @@ FunctionOptions::FunctionOptions(FunctionOptions&& other) { FunctionOptions::~FunctionOptions() { if (options != nullptr) { delete[] options; + options = nullptr; } } -// ResolvedSelector -// ---------------- - -ResolvedSelector::ResolvedSelector(const FunctionName& fn, - Selector* sel, - FunctionOptions&& opts, - FormattedPlaceholder&& val) - : selectorName(fn), selector(sel), options(std::move(opts)), value(std::move(val)) { - U_ASSERT(sel != nullptr); + +static bool containsOption(const UVector& opts, const ResolvedFunctionOption& opt) { + for (int32_t i = 0; i < opts.size(); i++) { + if (static_cast(opts[i])->getName() + == opt.getName()) { + return true; + } + } + return false; } -ResolvedSelector::ResolvedSelector(FormattedPlaceholder&& val) : value(std::move(val)) {} +// Options in `this` take precedence +// `this` can't be used after mergeOptions is called +FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other, + UErrorCode& status) { + UVector mergedOptions(status); + mergedOptions.setDeleter(uprv_deleteUObject); -ResolvedSelector& ResolvedSelector::operator=(ResolvedSelector&& other) noexcept { - selectorName = std::move(other.selectorName); - selector.adoptInstead(other.selector.orphan()); - options = std::move(other.options); - value = std::move(other.value); - return *this; -} + if (U_FAILURE(status)) { + return {}; + } -ResolvedSelector::ResolvedSelector(ResolvedSelector&& other) { - *this = std::move(other); -} + // Create a new vector consisting of the options from this `FunctionOptions` + for (int32_t i = 0; i < functionOptionsLen; i++) { + mergedOptions.adoptElement(create(std::move(options[i]), status), + status); + } -ResolvedSelector::~ResolvedSelector() {} + // Add each option from `other` that doesn't appear in this `FunctionOptions` + for (int i = 0; i < other.functionOptionsLen; i++) { + // Note: this is quadratic in the length of `options` + if (!containsOption(mergedOptions, other.options[i])) { + mergedOptions.adoptElement(create(std::move(other.options[i]), + status), + status); + } + } + + delete[] options; + options = nullptr; + functionOptionsLen = 0; + + return FunctionOptions(std::move(mergedOptions), status); +} // PrioritizedVariant // ------------------ @@ -204,6 +223,193 @@ PrioritizedVariant::~PrioritizedVariant() {} MessageContext::~MessageContext() {} + // InternalValue + // ------------- + + bool InternalValue::isFallback() const { + return std::holds_alternative(argument) + && std::get_if(&argument)->isFallback(); + } + + bool InternalValue::hasNullOperand() const { + return std::holds_alternative(argument) + && std::get_if(&argument)->isNullOperand(); + } + + FormattedPlaceholder InternalValue::takeArgument(UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return {}; + } + + if (std::holds_alternative(argument)) { + return std::move(*std::get_if(&argument)); + } + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + + const UnicodeString& InternalValue::getFallback() const { + if (std::holds_alternative(argument)) { + return std::get_if(&argument)->getFallback(); + } + return (*std::get_if(&argument))->getFallback(); + } + + const Selector* InternalValue::getSelector(UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { + return nullptr; + } + + if (selector == nullptr) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + return selector; + } + + InternalValue::InternalValue(FormattedPlaceholder&& arg) { + argument = std::move(arg); + selector = nullptr; + formatter = nullptr; + } + + InternalValue::InternalValue(InternalValue* operand, + FunctionOptions&& opts, + const FunctionName& functionName, + const Formatter* f, + const Selector* s) { + argument = operand; + options = std::move(opts); + name = functionName; + selector = s; + formatter = f; + U_ASSERT(selector != nullptr || formatter != nullptr); + } + + // `this` cannot be used after calling this method + void InternalValue::forceSelection(DynamicErrors& errs, + const UnicodeString* keys, + int32_t keysLen, + UnicodeString* prefs, + int32_t& prefsLen, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + + if (!canSelect()) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + // Find the argument and complete set of options by traversing `argument` + FunctionOptions opts; + InternalValue* p = this; + FunctionName selectorName = name; + while (std::holds_alternative(p->argument)) { + if (p->name != selectorName) { + // Can only compose calls to the same selector + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + // First argument to mergeOptions takes precedence + opts = opts.mergeOptions(std::move(p->options), errorCode); + if (U_FAILURE(errorCode)) { + return; + } + InternalValue* next = *std::get_if(&p->argument); + p = next; + } + FormattedPlaceholder arg = std::move(*std::get_if(&p->argument)); + + selector->selectKey(std::move(arg), std::move(opts), + keys, keysLen, + prefs, prefsLen, errorCode); + if (U_FAILURE(errorCode)) { + errorCode = U_ZERO_ERROR; + errs.setSelectorError(selectorName, errorCode); + } + } + + FormattedPlaceholder InternalValue::forceFormatting(DynamicErrors& errs, UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return {}; + } + + if (formatter == nullptr && selector == nullptr) { + U_ASSERT(std::holds_alternative(argument)); + return std::move(*std::get_if(&argument)); + } + if (formatter == nullptr) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + + FormattedPlaceholder arg; + + if (std::holds_alternative(argument)) { + arg = std::move(*std::get_if(&argument)); + } else { + arg = (*std::get_if(&argument))->forceFormatting(errs, + errorCode); + } + + if (U_FAILURE(errorCode)) { + return {}; + } + + // The fallback for a nullary function call is the function name + UnicodeString fallback; + if (arg.isNullOperand()) { + fallback = u":"; + fallback += name; + } else { + fallback = arg.getFallback(); + } + + // Call the function with the argument + FormattedPlaceholder result = formatter->format(std::move(arg), std::move(options), errorCode); + if (U_FAILURE(errorCode)) { + if (errorCode == U_MF_OPERAND_MISMATCH_ERROR) { + errorCode = U_ZERO_ERROR; + errs.setOperandMismatchError(name, errorCode); + } else { + errorCode = U_ZERO_ERROR; + // Convey any error generated by the formatter + // as a formatting error, except for operand mismatch errors + errs.setFormattingError(name, errorCode); + } + } + // Ignore the output if any error occurred + if (errs.hasFormattingError()) { + return FormattedPlaceholder(fallback); + } + + return result; + } + + InternalValue& InternalValue::operator=(InternalValue&& other) noexcept { + argument = std::move(other.argument); + other.argument = nullptr; + options = std::move(other.options); + name = other.name; + selector = other.selector; + formatter = other.formatter; + other.selector = nullptr; + other.formatter = nullptr; + + return *this; + } + + InternalValue::~InternalValue() { + delete selector; + selector = nullptr; + delete formatter; + formatter = nullptr; + if (std::holds_alternative(argument)) { + delete *std::get_if(&argument); + argument = nullptr; + } + } + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_evaluation.h b/icu4c/source/i18n/messageformat2_evaluation.h index ae8b4a08a3bd..fcb30bc3e638 100644 --- a/icu4c/source/i18n/messageformat2_evaluation.h +++ b/icu4c/source/i18n/messageformat2_evaluation.h @@ -64,38 +64,6 @@ namespace message2 { return 1; } - // Encapsulates a value to be scrutinized by a `match` with its resolved - // options and the name of the selector - class ResolvedSelector : public UObject { - public: - ResolvedSelector() {} - ResolvedSelector(const FunctionName& fn, - Selector* selector, - FunctionOptions&& options, - FormattedPlaceholder&& value); - // Used either for errors, or when selector isn't yet known - explicit ResolvedSelector(FormattedPlaceholder&& value); - bool hasSelector() const { return selector.isValid(); } - const FormattedPlaceholder& argument() const { return value; } - FormattedPlaceholder&& takeArgument() { return std::move(value); } - const Selector* getSelector() { - U_ASSERT(selector.isValid()); - return selector.getAlias(); - } - FunctionOptions&& takeOptions() { - return std::move(options); - } - const FunctionName& getSelectorName() const { return selectorName; } - virtual ~ResolvedSelector(); - ResolvedSelector& operator=(ResolvedSelector&&) noexcept; - ResolvedSelector(ResolvedSelector&&); - private: - FunctionName selectorName; // For error reporting - LocalPointer selector; - FunctionOptions options; - FormattedPlaceholder value; - }; // class ResolvedSelector - // Closures and environments // ------------------------- @@ -199,6 +167,44 @@ namespace message2 { }; // class MessageContext + // InternalValue + // ---------------- + + class InternalValue : public UObject { + public: + const FunctionName& getFunctionName() const { return name; } + bool canSelect() const { return selector != nullptr; } + const Selector* getSelector(UErrorCode&) const; + FormattedPlaceholder forceFormatting(DynamicErrors& errs, + UErrorCode& errorCode); + void forceSelection(DynamicErrors& errs, + const UnicodeString* keys, + int32_t keysLen, + UnicodeString* prefs, + int32_t& prefsLen, + UErrorCode& errorCode); + // Needs to be deep-copyable and movable + virtual ~InternalValue(); + InternalValue(FormattedPlaceholder&&); + // Formatter and selector may be null + InternalValue(InternalValue*, FunctionOptions&&, const FunctionName&, const Formatter*, + const Selector*); + const UnicodeString& getFallback() const; + bool isFallback() const; + bool hasNullOperand() const; + // Can't be used anymore after calling this + FormattedPlaceholder takeArgument(UErrorCode& errorCode); + InternalValue(InternalValue&& other) { *this = std::move(other); } + InternalValue& operator=(InternalValue&& other) noexcept; + private: + // InternalValue is owned (if present) + std::variant argument; + FunctionOptions options; + FunctionName name; + const Selector* selector; // May be null + const Formatter* formatter; // May be null, but one or the other should be non-null unless argument is a FormattedPlaceholder + }; // class InternalValue + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_formatter.cpp b/icu4c/source/i18n/messageformat2_formatter.cpp index 82f2191ba1a6..cba0c1599237 100644 --- a/icu4c/source/i18n/messageformat2_formatter.cpp +++ b/icu4c/source/i18n/messageformat2_formatter.cpp @@ -152,9 +152,13 @@ namespace message2 { .adoptFormatter(FunctionName(UnicodeString("time")), time, success) .adoptFormatter(FunctionName(UnicodeString("number")), number, success) .adoptFormatter(FunctionName(UnicodeString("integer")), integer, success) + .adoptFormatter(FunctionName(UnicodeString("test:function")), new StandardFunctions::TestFormatFactory(), success) + .adoptFormatter(FunctionName(UnicodeString("test:format")), new StandardFunctions::TestFormatFactory(), success) .adoptSelector(FunctionName(UnicodeString("number")), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success) .adoptSelector(FunctionName(UnicodeString("integer")), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success) - .adoptSelector(FunctionName(UnicodeString("string")), new StandardFunctions::TextFactory(), success); + .adoptSelector(FunctionName(UnicodeString("string")), new StandardFunctions::TextFactory(), success) + .adoptSelector(FunctionName(UnicodeString("test:function")), new StandardFunctions::TestSelectFactory(), success) + .adoptSelector(FunctionName(UnicodeString("test:select")), new StandardFunctions::TestSelectFactory(), success); CHECK_ERROR(success); standardMFFunctionRegistry = standardFunctionsBuilder.build(); CHECK_ERROR(success); @@ -279,8 +283,11 @@ namespace message2 { return formatter; } - bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const { - U_ASSERT(hasCustomMFFunctionRegistry()); + bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type, + FunctionName& name) const { + if (!hasCustomMFFunctionRegistry()) { + return false; + } const MFFunctionRegistry& reg = getCustomMFFunctionRegistry(); return reg.getDefaultFormatterNameByType(type, name); } diff --git a/icu4c/source/i18n/messageformat2_function_registry.cpp b/icu4c/source/i18n/messageformat2_function_registry.cpp index b4c33544dc92..5724f8234269 100644 --- a/icu4c/source/i18n/messageformat2_function_registry.cpp +++ b/icu4c/source/i18n/messageformat2_function_registry.cpp @@ -160,9 +160,13 @@ void MFFunctionRegistry::checkStandard() const { checkFormatter("time"); checkFormatter("number"); checkFormatter("integer"); + checkFormatter("test:function"); + checkFormatter("test:format"); checkSelector("number"); checkSelector("integer"); checkSelector("string"); + checkSelector("test:function"); + checkSelector("test:select"); } // Formatter/selector helpers @@ -426,14 +430,14 @@ static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) { return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN"))); } -static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& errorCode) { +static double parseNumberLiteral(const Formattable& input, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return {}; } // Copying string to avoid GCC dangling-reference warning // (although the reference is safe) - UnicodeString inputStr = input.asFormattable().getString(errorCode); + UnicodeString inputStr = input.getString(errorCode); // Precondition: `input`'s source Formattable has type string if (U_FAILURE(errorCode)) { return {}; @@ -465,8 +469,42 @@ static double parseNumberLiteral(const FormattedPlaceholder& input, UErrorCode& return result; } +static UChar32 digitToChar(int32_t val, UErrorCode errorCode) { + if (U_FAILURE(errorCode)) { + return '0'; + } + if (val < 0 || val > 9) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + switch(val) { + case 0: + return '0'; + case 1: + return '1'; + case 2: + return '2'; + case 3: + return '3'; + case 4: + return '4'; + case 5: + return '5'; + case 6: + return '6'; + case 7: + return '7'; + case 8: + return '8'; + case 9: + return '9'; + default: + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return '0'; + } +} + static FormattedPlaceholder tryParsingNumberLiteral(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) { - double numberValue = parseNumberLiteral(input, errorCode); + double numberValue = parseNumberLiteral(input.asFormattable(), errorCode); if (U_FAILURE(errorCode)) { return notANumber(input); } @@ -1237,6 +1275,273 @@ void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat, StandardFunctions::TextFactory::~TextFactory() {} StandardFunctions::TextSelector::~TextSelector() {} +// ------------ TestFormatFactory + +Formatter* StandardFunctions::TestFormatFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) { + NULL_ON_ERROR(errorCode); + + // Results are not locale-dependent + (void) locale; + + Formatter* result = new TestFormat(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +StandardFunctions::TestFormatFactory::~TestFormatFactory() {} +StandardFunctions::TestFormat::~TestFormat() {} + +// Extract numeric value from a Formattable or, if it's a string, +// parse it as a number according to the MF2 `number-literal` grammar production +double formattableToNumber(const Formattable& arg, UErrorCode& status) { + if (U_FAILURE(status)) { + return 0; + } + + double result = 0; + + switch (arg.getType()) { + case UFMT_DOUBLE: { + result = arg.getDouble(status); + U_ASSERT(U_SUCCESS(status)); + break; + } + case UFMT_LONG: { + result = (double) arg.getLong(status); + U_ASSERT(U_SUCCESS(status)); + break; + } + case UFMT_INT64: { + result = (double) arg.getInt64(status); + U_ASSERT(U_SUCCESS(status)); + break; + } + case UFMT_STRING: { + // Try to parse the string as a number + result = parseNumberLiteral(arg, status); + if (U_FAILURE(status)) { + status = U_MF_OPERAND_MISMATCH_ERROR; + } + break; + } + default: { + // Other types can't be parsed as a number + status = U_MF_OPERAND_MISMATCH_ERROR; + break; + } + } + return result; +} + + +/* static */ void StandardFunctions::TestFormat::testFunctionParameters(const FormattedPlaceholder& arg, + const FunctionOptions& options, + int32_t& decimalPlaces, + bool& failsFormat, + bool& failsSelect, + double& input, + UErrorCode& status) { + CHECK_ERROR(status); + + // 1. Let DecimalPlaces be 0. + decimalPlaces = 0; + + // 2. Let FailsFormat be false. + failsFormat = false; + + // 3. Let FailsSelect be false. + failsSelect = false; + + // 4. Let arg be the resolved value of the expression operand. + // (already true) + + // Step 5 omitted because composition isn't fully implemented yet + // 6. Else if arg is a numerical value or a string matching the number-literal production, then + input = formattableToNumber(arg.asFormattable(), status); + if (U_FAILURE(status)) { + // 7. Else, + // 7i. Emit "bad-input" Resolution Error. + status = U_MF_OPERAND_MISMATCH_ERROR; + // 7ii. Use a fallback value as the resolved value of the expression. + // Further steps of this algorithm are not followed. + } + // 8. If the decimalPlaces option is set, then + Formattable opt; + if (options.getFunctionOption(UnicodeString("decimalPlaces"), opt)) { + // 8i. If its value resolves to a numerical integer value 0 or 1 + // or their corresponding string representations '0' or '1', then + double decimalPlacesInput = formattableToNumber(opt, status); + if (U_SUCCESS(status)) { + if (decimalPlacesInput == 0 || decimalPlacesInput == 1) { + // 8ia. Set DecimalPlaces to be the numerical value of the option. + decimalPlaces = decimalPlacesInput; + } + } + // 8ii. Else if its value is not an unresolved value set by option resolution, + else { + // 8iia. Emit "bad-option" Resolution Error. + status = U_MF_BAD_OPTION; + // 8iib. Use a fallback value as the resolved value of the expression. + } + } + // 9. If the fails option is set, then + Formattable failsOpt; + if (options.getFunctionOption(UnicodeString("fails"), failsOpt)) { + UnicodeString failsString = failsOpt.getString(status); + if (U_SUCCESS(status)) { + // 9i. If its value resolves to the string 'always', then + if (failsString == u"always") { + // 9ia. Set FailsFormat to be true + failsFormat = true; + // 9ib. Set FailsSelect to be true. + failsSelect = true; + } + // 9ii. Else if its value resolves to the string "format", then + else if (failsString == u"format") { + // 9ia. Set FailsFormat to be true + failsFormat = true; + } + // 9iii. Else if its value resolves to the string "select", then + else if (failsString == u"select") { + // 9iiia. Set FailsSelect to be true. + failsSelect = true; + } + // 9iv. Else if its value does not resolve to the string "never", then + else if (failsString != u"never") { + // 9iv(a). Emit "bad-option" Resolution Error. + status = U_MF_BAD_OPTION; + } + } else { + // 9iv. again + status = U_MF_BAD_OPTION; + } + } +} + +FormattedPlaceholder StandardFunctions::TestFormat::format(FormattedPlaceholder&& arg, + FunctionOptions&& options, + UErrorCode& status) const{ + + int32_t decimalPlaces; + bool failsFormat; + bool failsSelect; + double input; + + testFunctionParameters(arg, options, decimalPlaces, + failsFormat, failsSelect, input, status); + if (U_FAILURE(status)) { + return FormattedPlaceholder(arg.getFallback()); + } + + // If FailsFormat is true, attempting to format the placeholder to any + // formatting target will fail. + if (failsFormat) { + status = U_MF_FORMATTING_ERROR; + return FormattedPlaceholder(arg.getFallback()); + } + UnicodeString result; + // When :test:function is used as a formatter, a placeholder resolving to a value + // with a :test:function expression is formatted as a concatenation of the following parts: + // 1. If Input is less than 0, the character - U+002D Hyphen-Minus. + if (input < 0) { + result += HYPHEN; + } + // 2. The truncated absolute integer value of Input, i.e. floor(abs(Input)), formatted as a + // sequence of decimal digit characters (U+0030...U+0039). + char buffer[256]; + bool ignore; + int ignoreLen; + int ignorePoint; + double_conversion::DoubleToStringConverter::DoubleToAscii(floor(abs(input)), + double_conversion::DoubleToStringConverter::DtoaMode::SHORTEST, + 0, + buffer, + 256, + &ignore, + &ignoreLen, + &ignorePoint); + result += UnicodeString(buffer); + // 3. If DecimalPlaces is 1, then + if (decimalPlaces == 1) { + // 3i. The character . U+002E Full Stop. + result += u"."; + // 3ii. The single decimal digit character representing the value + // floor((abs(Input) - floor(abs(Input))) * 10) + int32_t val = floor((abs(input) - floor(abs(input)) * 10)); + result += digitToChar(val, status); + U_ASSERT(U_SUCCESS(status)); + } + return FormattedPlaceholder(result); +} + +// ------------ TestSelectFactory + +StandardFunctions::TestSelectFactory::~TestSelectFactory() {} +StandardFunctions::TestSelect::~TestSelect() {} + +Selector* StandardFunctions::TestSelectFactory::createSelector(const Locale& locale, + UErrorCode& errorCode) const { + NULL_ON_ERROR(errorCode); + + // Results are not locale-dependent + (void) locale; + + Selector* result = new TestSelect(); + if (result == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +void StandardFunctions::TestSelect::selectKey(FormattedPlaceholder&& val, + FunctionOptions&& options, + const UnicodeString* keys, + int32_t keysLen, + UnicodeString* prefs, + int32_t& prefsLen, + UErrorCode& status) const { + int32_t decimalPlaces; + bool failsFormat; + bool failsSelect; + double input; + + TestFormat::testFunctionParameters(val, options, decimalPlaces, + failsFormat, failsSelect, input, status); + + if (U_FAILURE(status)) { + return; + } + + if (failsSelect) { + status = U_MF_SELECTOR_ERROR; + return; + } + + // If the Input is 1 and DecimalPlaces is 1, the method will return some slice + // of the list « '1.0', '1' », depending on whether those values are included in keys. + bool include1point0 = false; + bool include1 = false; + if (input == 1 && decimalPlaces == 1) { + include1point0 = true; + include1 = true; + } else if (input == 1 && decimalPlaces == 0) { + include1 = true; + } + + // If the Input is 1 and DecimalPlaces is 0, the method will return the list « '1' » if + // keys includes '1', or an empty list otherwise. + // If the Input is any other value, the method will return an empty list. + for (int32_t i = 0; i < keysLen; i++) { + if ((keys[i] == u"1" && include1) + || (keys[i] == u"1.0" && include1point0)) { + prefs[prefsLen] = keys[i]; + prefsLen++; + } + } +} + } // namespace message2 U_NAMESPACE_END diff --git a/icu4c/source/i18n/messageformat2_function_registry_internal.h b/icu4c/source/i18n/messageformat2_function_registry_internal.h index 46845976384e..9599b67bb2ba 100644 --- a/icu4c/source/i18n/messageformat2_function_registry_internal.h +++ b/icu4c/source/i18n/messageformat2_function_registry_internal.h @@ -211,6 +211,60 @@ namespace message2 { TextSelector(const Locale& l) : locale(l) {} }; + + // See https://github.com/unicode-org/message-format-wg/blob/main/test/README.md + class TestFormatFactory : public FormatterFactory { + public: + Formatter* createFormatter(const Locale& locale, UErrorCode& status) override; + TestFormatFactory() {} + virtual ~TestFormatFactory(); + }; + + class TestSelect; + + class TestFormat : public Formatter { + public: + FormattedPlaceholder format(FormattedPlaceholder&& toFormat, FunctionOptions&& options, UErrorCode& status) const override; + virtual ~TestFormat(); + + private: + friend class TestFormatFactory; + friend class TestSelect; + TestFormat() {} + static void testFunctionParameters(const FormattedPlaceholder& arg, + const FunctionOptions& options, + int32_t& decimalPlaces, + bool& failsFormat, + bool& failsSelect, + double& input, + UErrorCode& status); + + }; + + // See https://github.com/unicode-org/message-format-wg/blob/main/test/README.md + class TestSelectFactory : public SelectorFactory { + public: + Selector* createSelector(const Locale& locale, UErrorCode& status) const override; + TestSelectFactory() {} + virtual ~TestSelectFactory(); + }; + + class TestSelect : public Selector { + public: + void selectKey(FormattedPlaceholder&& val, + FunctionOptions&& options, + const UnicodeString* keys, + int32_t keysLen, + UnicodeString* prefs, + int32_t& prefsLen, + UErrorCode& status) const override; + virtual ~TestSelect(); + + private: + friend class TestSelectFactory; + TestSelect() {} + }; + }; extern void formatDateWithDefaults(const Locale& locale, UDate date, UnicodeString&, UErrorCode& errorCode); diff --git a/icu4c/source/i18n/unicode/messageformat2.h b/icu4c/source/i18n/unicode/messageformat2.h index ea0401e6d9ed..926d14318d17 100644 --- a/icu4c/source/i18n/unicode/messageformat2.h +++ b/icu4c/source/i18n/unicode/messageformat2.h @@ -33,8 +33,8 @@ namespace message2 { class Environment; class MessageContext; - class ResolvedSelector; class StaticErrors; + class InternalValue; /** *

MessageFormatter is a Technical Preview API implementing MessageFormat 2.0. @@ -339,19 +339,6 @@ namespace message2 { // Do not define default assignment operator const MessageFormatter &operator=(const MessageFormatter &) = delete; - ResolvedSelector resolveVariables(const Environment& env, - const data_model::VariableName&, - MessageContext&, - UErrorCode &) const; - ResolvedSelector resolveVariables(const Environment& env, - const data_model::Operand&, - MessageContext&, - UErrorCode &) const; - ResolvedSelector resolveVariables(const Environment& env, - const data_model::Expression&, - MessageContext&, - UErrorCode &) const; - // Selection methods // Takes a vector of FormattedPlaceholders @@ -361,7 +348,7 @@ namespace message2 { // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output) void sortVariants(const UVector&, UVector&, UErrorCode&) const; // Takes a vector of strings (input) and a vector of strings (output) - void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const; + void matchSelectorKeys(const UVector&, MessageContext&, InternalValue* rv, UVector&, UErrorCode&) const; // Takes a vector of FormattedPlaceholders (input), // and a vector of vectors of strings (output) void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const; @@ -372,26 +359,24 @@ namespace message2 { UnicodeString normalizeNFC(const UnicodeString&) const; [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const; void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const; - // Formats a call to a formatting function + // Evaluates a function call // Dispatches on argument type - [[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument, - MessageContext& context, - UErrorCode& status) const; + [[nodiscard]] InternalValue* evalFunctionCall(FormattedPlaceholder&& argument, + MessageContext& context, + UErrorCode& status) const; // Dispatches on function name - [[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName, - FormattedPlaceholder&& argument, - FunctionOptions&& options, - MessageContext& context, - UErrorCode& status) const; - // Formats a variableName that appears as a selector - ResolvedSelector formatSelector(const Environment& env, - const data_model::VariableName&, - MessageContext&, - UErrorCode&) const; + [[nodiscard]] InternalValue* evalFunctionCall(const FunctionName& functionName, + InternalValue* argument, + FunctionOptions&& options, + MessageContext& context, + UErrorCode& status) const; // Formats an expression that appears in a pattern or as the definition of a local variable - [[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue* formatExpression(const Environment&, + const data_model::Expression&, + MessageContext&, + UErrorCode&) const; [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const; - [[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; + [[nodiscard]] InternalValue* formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const; [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const; void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const; diff --git a/icu4c/source/i18n/unicode/messageformat2_formattable.h b/icu4c/source/i18n/unicode/messageformat2_formattable.h index 53c500dac2f6..d7f4130f493b 100644 --- a/icu4c/source/i18n/unicode/messageformat2_formattable.h +++ b/icu4c/source/i18n/unicode/messageformat2_formattable.h @@ -551,6 +551,7 @@ class U_I18N_API FunctionOptions : public UObject { */ FunctionOptions& operator=(const FunctionOptions&) = delete; private: + friend class InternalValue; friend class MessageFormatter; friend class StandardFunctions; @@ -568,12 +569,10 @@ class U_I18N_API FunctionOptions : public UObject { // that code in the header because it would have to call internal Hashtable methods. ResolvedFunctionOption* options; int32_t functionOptionsLen = 0; -}; // class FunctionOptions - - // TODO doc comments - // Encapsulates either a formatted string or formatted number; - // more output types could be added in the future. + // Returns a new FunctionOptions + FunctionOptions mergeOptions(FunctionOptions&& other, UErrorCode&); +}; // class FunctionOptions /** * A `FormattedValue` represents the result of formatting a `message2::Formattable`. diff --git a/icu4c/source/test/depstest/depstest.py b/icu4c/source/test/depstest/depstest.py index fba45a079815..f993308fbd38 100755 --- a/icu4c/source/test/depstest/depstest.py +++ b/icu4c/source/test/depstest/depstest.py @@ -123,6 +123,9 @@ def _ReadLibrary(root_path, library_name): ("i18n/messageformat2_data_model.o", "typeinfo for std::exception"), ("i18n/messageformat2_data_model.o", "vtable for std::exception"), ("i18n/messageformat2_data_model.o", "std::exception::~exception()"), + ("i18n/messageformat2_evaluation.o", "typeinfo for std::exception"), + ("i18n/messageformat2_evaluation.o", "vtable for std::exception"), + ("i18n/messageformat2_evaluation.o", "std::exception::~exception()"), ("i18n/messageformat2_formattable.o", "typeinfo for std::exception"), ("i18n/messageformat2_formattable.o", "vtable for std::exception"), ("i18n/messageformat2_formattable.o", "std::exception::~exception()"), diff --git a/testdata/message2/duplicate-declarations.json b/testdata/message2/duplicate-declarations.json index cd3acc1576d3..b744365f51f1 100644 --- a/testdata/message2/duplicate-declarations.json +++ b/testdata/message2/duplicate-declarations.json @@ -12,32 +12,26 @@ "tests": [ { "src": ".local $foo = {$foo} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {42} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {:unknown} .local $foo = {42} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar 42" + "params": [{ "name": "foo", "value": "foo" }] }, { - "src": ".local $x = {42} .local $y = {$x} .local $x = {13} {{{$x} {$y}}}", - "exp": "13 42" + "src": ".local $x = {42} .local $y = {$x} .local $x = {13} {{{$x} {$y}}}" }, { "src": ".local $foo = {$foo} {{bar {$foo}}}", - "params": [{ "name": "foo", "value": "foo" }], - "exp": "bar foo" + "params": [{ "name": "foo", "value": "foo" }] }, { "src": ".local $foo = {$bar} .local $bar = {$baz} {{bar {$foo}}}", - "params": [{ "name": "baz", "value": "foo" }], - "exp": "bar {$bar}" + "params": [{ "name": "baz", "value": "foo" }] } ] } diff --git a/testdata/message2/icu-test-previous-release.json b/testdata/message2/icu-test-previous-release.json index 74fc41fdfcea..5f16c8587666 100644 --- a/testdata/message2/icu-test-previous-release.json +++ b/testdata/message2/icu-test-previous-release.json @@ -104,9 +104,7 @@ }, { "src": "empty { }", - "exp": "empty ", - "expErrors": [{ "type": "syntax-error" }], - "ignoreCpp": "Fallback is unclear. See https://github.com/unicode-org/message-format-wg/issues/703" + "expErrors": [{ "type": "syntax-error" }] }, { "src": "bad {:}", @@ -115,9 +113,7 @@ }, { "src": "{bad {$placeholder option}}", - "exp": "bad {$placeholder}", - "expErrors": [{ "type": "syntax-error"}, { "type": "unresolved-variable" }], - "ignoreCpp": "Fallback is unclear. See https://github.com/unicode-org/message-format-wg/issues/703" + "expErrors": [{ "type": "syntax-error"}, { "type": "unresolved-variable" }] }, { "src": ".local $f = {|foo| :string} .match $f *{{foo}}", @@ -125,15 +121,11 @@ }, { "src": ".input {$foo :string} .match $foo * * {{foo}}", - "exp": "foo", - "expErrors": [{ "type": "variant-key-mismatch" }, { "type": "unresolved-variable" }], - "ignoreCpp": "Fallback is unclear. See https://github.com/unicode-org/message-format-wg/issues/735" + "expErrors": [{ "type": "variant-key-mismatch" }, { "type": "unresolved-variable" }] }, { "src": ".input {$foo :string} .input {$bar :string} .match $foo $bar * {{foo}}", - "exp": "foo", - "expErrors": [{ "type": "variant-key-mismatch" }, { "type": "unresolved-variable" }], - "ignoreCpp": "Fallback is unclear. See https://github.com/unicode-org/message-format-wg/issues/735" + "expErrors": [{ "type": "variant-key-mismatch" }, { "type": "unresolved-variable" }] } ] } diff --git a/testdata/message2/tricky-declarations.json b/testdata/message2/tricky-declarations.json index 3fded666e633..eb7b5ac90add 100644 --- a/testdata/message2/tricky-declarations.json +++ b/testdata/message2/tricky-declarations.json @@ -6,12 +6,10 @@ }, "tests": [ { "src": ".input {$var :number minimumFractionDigits=$var2} .input {$var2 :number minimumFractionDigits=5} {{{$var} {$var2}}}", - "exp": "1.000 3.00000", "params": [{ "name": "var", "value": 1}, {"name": "var2", "value": 3 }], "expErrors": [{ "type": "duplicate-declaration" }] }, { "src": ".local $var = {$var2} .local $var2 = {1} {{{$var} {$var2}}}", - "exp": "5 1", "params": [{ "name": "var2", "value": 5 }], "expErrors": [{ "type": "duplicate-declaration" }] }