diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index ff9ad3a4e3c4..4a73f5592052 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -2633,59 +2633,66 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName, } void -Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) -{ - if (U_FAILURE(status)) { +Locale::setKeywordValue(StringPiece keywordName, + StringPiece keywordValue, + UErrorCode& status) { + if (U_FAILURE(status)) { return; } + if (keywordName.empty()) { + status = U_ILLEGAL_ARGUMENT_ERROR; return; } if (status == U_STRING_NOT_TERMINATED_WARNING) { status = U_ZERO_ERROR; } - int32_t bufferLength = - uprv_max(static_cast(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY); - int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName, - bufferLength, &status) + 1; - U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); - /* Handle the case the current buffer is not enough to hold the new id */ - if (status == U_BUFFER_OVERFLOW_ERROR) { - U_ASSERT(newLength > bufferLength); - char* newFullName = static_cast(uprv_malloc(newLength)); - if (newFullName == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_strcpy(newFullName, fullName); - if (fullName != fullNameBuffer) { - if (baseName == fullName) { - baseName = newFullName; // baseName should not point to freed memory. + + int32_t length = static_cast(uprv_strlen(fullName)); + int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY : length + 1; + + const char* start = locale_getKeywordsStart(fullName); + int32_t offset = start == nullptr ? length : start - fullName; + + for (;;) { + // Remove -1 from the capacity so that this function can guarantee NUL termination. + CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1); + + int32_t reslen = ulocimp_setKeywordValue( + {fullName + offset, static_cast(length - offset)}, + keywordName, + keywordValue, + sink, + status); + + if (status == U_BUFFER_OVERFLOW_ERROR) { + capacity = reslen + offset + 1; + char* newFullName = static_cast(uprv_malloc(capacity)); + if (newFullName == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; } - // if full Name is already on the heap, need to free it. - uprv_free(fullName); + uprv_memcpy(newFullName, fullName, length + 1); + if (fullName != fullNameBuffer) { + if (baseName == fullName) { + baseName = newFullName; // baseName should not point to freed memory. + } + // if fullName is already on the heap, need to free it. + uprv_free(fullName); + } + fullName = newFullName; + status = U_ZERO_ERROR; + continue; } - fullName = newFullName; - status = U_ZERO_ERROR; - uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status); - U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); - } else { - U_ASSERT(newLength <= bufferLength); + + if (U_FAILURE(status)) { return; } + u_terminateChars(fullName, capacity, reslen + offset, &status); + break; } - if (U_SUCCESS(status) && baseName == fullName) { + + if (baseName == fullName) { // May have added the first keyword, meaning that the fullName is no longer also the baseName. initBaseName(status); } } -void -Locale::setKeywordValue(StringPiece keywordName, - StringPiece keywordValue, - UErrorCode& status) { - if (U_FAILURE(status)) { return; } - // TODO: Remove the need for a const char* to a NUL terminated buffer. - const CharString keywordName_nul(keywordName, status); - const CharString keywordValue_nul(keywordValue, status); - setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status); -} - void Locale::setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index 6ce799442ef8..51887c97c3e1 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -872,6 +872,11 @@ uloc_setKeywordValue(const char* keywordName, { if (U_FAILURE(*status)) { return 0; } + if (keywordName == nullptr || *keywordName == 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + if (bufferCapacity <= 1) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -890,7 +895,11 @@ uloc_setKeywordValue(const char* keywordName, CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords, bufferCapacity - baseLen - 1); int32_t reslen = ulocimp_setKeywordValue( - keywords, keywordName, keywordValue, sink, *status); + keywords == nullptr ? std::string_view() : keywords, + keywordName, + keywordValue == nullptr ? std::string_view() : keywordValue, + sink, + *status); if (U_FAILURE(*status)) { return *status == U_BUFFER_OVERFLOW_ERROR ? reslen + baseLen : 0; @@ -905,24 +914,29 @@ uloc_setKeywordValue(const char* keywordName, } U_EXPORT void -ulocimp_setKeywordValue(const char* keywordName, - const char* keywordValue, +ulocimp_setKeywordValue(std::string_view keywordName, + std::string_view keywordValue, CharString& localeID, UErrorCode& status) { if (U_FAILURE(status)) { return; } - // This is safe because CharString::truncate() doesn't actually erase any - // data, but simply sets the position for where new data will be written. - const char* keywords = locale_getKeywordsStart(localeID.data()); - if (keywords != nullptr) localeID.truncate(keywords - localeID.data()); + std::string_view keywords; + if (const char* start = locale_getKeywordsStart(localeID.data()); start != nullptr) { + // This is safe because CharString::truncate() doesn't actually erase any + // data, but simply sets the position for where new data will be written. + int32_t size = start - localeID.data(); + keywords = localeID.toStringPiece(); + keywords.remove_prefix(size); + localeID.truncate(size); + } CharStringByteSink sink(&localeID); ulocimp_setKeywordValue(keywords, keywordName, keywordValue, sink, status); } U_EXPORT int32_t -ulocimp_setKeywordValue(const char* keywords, - const char* keywordName, - const char* keywordValue, +ulocimp_setKeywordValue(std::string_view keywords, + std::string_view keywordName, + std::string_view keywordValue, ByteSink& sink, UErrorCode& status) { @@ -931,9 +945,6 @@ ulocimp_setKeywordValue(const char* keywords, /* TODO: sorting. removal. */ int32_t needLen = 0; int32_t rc; - const char* nextSeparator = nullptr; - const char* nextEqualsign = nullptr; - const char* keywordStart = nullptr; CharString updatedKeysAndValues; bool handledInputKeyAndValue = false; char keyValuePrefix = '@'; @@ -941,7 +952,7 @@ ulocimp_setKeywordValue(const char* keywords, if (status == U_STRING_NOT_TERMINATED_WARNING) { status = U_ZERO_ERROR; } - if (keywordName == nullptr || keywordName[0] == 0) { + if (keywordName.empty()) { status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -951,21 +962,19 @@ ulocimp_setKeywordValue(const char* keywords, } CharString canonKeywordValue; - if(keywordValue) { - while (*keywordValue != 0) { - if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) { - status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ - return 0; - } - /* Should we force lowercase in value to set? */ - canonKeywordValue.append(*keywordValue++, status); + for (char c : keywordValue) { + if (!UPRV_ISALPHANUM(c) && !UPRV_OK_VALUE_PUNCTUATION(c)) { + status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ + return 0; } + /* Should we force lowercase in value to set? */ + canonKeywordValue.append(c, status); } if (U_FAILURE(status)) { return 0; } - if (keywords == nullptr || keywords[1] == '\0') { + if (keywords.size() <= 1) { if (canonKeywordValue.isEmpty()) { /* no keywords = nothing to remove */ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); return 0; @@ -991,23 +1000,20 @@ ulocimp_setKeywordValue(const char* keywords, return needLen; } /* end shortcut - no @ */ - keywordStart = keywords; /* search for keyword */ - while(keywordStart) { - const char* keyValueTail; - + for (size_t keywordStart = 0; keywordStart != std::string_view::npos;) { keywordStart++; /* skip @ or ; */ - nextEqualsign = uprv_strchr(keywordStart, '='); - if (!nextEqualsign) { + size_t nextEqualsign = keywords.find('=', keywordStart); + if (nextEqualsign == std::string_view::npos) { status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ return 0; } /* strip leading & trailing spaces (TC decided to tolerate these) */ - while(*keywordStart == ' ') { + while (keywordStart < keywords.size() && keywords[keywordStart] == ' ') { keywordStart++; } - keyValueTail = nextEqualsign; - while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') { + size_t keyValueTail = nextEqualsign; + while (keyValueTail > keywordStart && keywords[keyValueTail - 1] == ' ') { keyValueTail--; } /* now keyValueTail points to first char after the keyName */ @@ -1018,26 +1024,26 @@ ulocimp_setKeywordValue(const char* keywords, } CharString localeKeywordName; while (keywordStart < keyValueTail) { - if (!UPRV_ISALPHANUM(*keywordStart)) { + if (!UPRV_ISALPHANUM(keywords[keywordStart])) { status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ return 0; } - localeKeywordName.append(uprv_tolower(*keywordStart++), status); + localeKeywordName.append(uprv_tolower(keywords[keywordStart++]), status); } if (U_FAILURE(status)) { return 0; } - nextSeparator = uprv_strchr(nextEqualsign, ';'); + size_t nextSeparator = keywords.find(';', nextEqualsign); /* start processing the value part */ nextEqualsign++; /* skip '=' */ /* First strip leading & trailing spaces (TC decided to tolerate these) */ - while(*nextEqualsign == ' ') { + while (nextEqualsign < keywords.size() && keywords[nextEqualsign] == ' ') { nextEqualsign++; } - keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign); - while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') { + keyValueTail = nextSeparator == std::string_view::npos ? keywords.size() : nextSeparator; + while (keyValueTail > nextEqualsign && keywords[keyValueTail - 1] == ' ') { keyValueTail--; } if (nextEqualsign == keyValueTail) { @@ -1072,9 +1078,10 @@ ulocimp_setKeywordValue(const char* keywords, keyValuePrefix = ';'; /* for any subsequent key-value pair */ updatedKeysAndValues.append(localeKeywordName, status); updatedKeysAndValues.append('=', status); - updatedKeysAndValues.append(nextEqualsign, static_cast(keyValueTail-nextEqualsign), status); + updatedKeysAndValues.append(keywords.data() + nextEqualsign, + static_cast(keyValueTail - nextEqualsign), status); } - if (!nextSeparator && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) { + if (nextSeparator == std::string_view::npos && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) { /* append new entry at the end, it sorts later than existing entries */ updatedKeysAndValues.append(keyValuePrefix, status); /* skip keyValuePrefix update, no subsequent key-value pair */ @@ -1098,7 +1105,7 @@ ulocimp_setKeywordValue(const char* keywords, /* if input key/value specified removal of a keyword not present in locale, or * there was an error in CharString.append, leave original locale alone. */ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); - return static_cast(uprv_strlen(keywords)); + return static_cast(keywords.size()); } needLen = updatedKeysAndValues.length(); @@ -2225,7 +2232,7 @@ uloc_getLCID(const char* localeID) CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status); if (U_SUCCESS(status) && !collVal.isEmpty()) { CharString tmpLocaleID = ulocimp_getBaseName(localeID, status); - ulocimp_setKeywordValue("collation", collVal.data(), tmpLocaleID, status); + ulocimp_setKeywordValue("collation", collVal.toStringPiece(), tmpLocaleID, status); if (U_SUCCESS(status)) { return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status); } diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h index 536b6a6de60c..3b4f107da71c 100644 --- a/icu4c/source/common/ulocimp.h +++ b/icu4c/source/common/ulocimp.h @@ -131,15 +131,15 @@ U_EXPORT icu::CharString ulocimp_getVariant(const char* localeID, UErrorCode& status); U_EXPORT void -ulocimp_setKeywordValue(const char* keywordName, - const char* keywordValue, +ulocimp_setKeywordValue(std::string_view keywordName, + std::string_view keywordValue, icu::CharString& localeID, UErrorCode& status); U_EXPORT int32_t -ulocimp_setKeywordValue(const char* keywords, - const char* keywordName, - const char* keywordValue, +ulocimp_setKeywordValue(std::string_view keywords, + std::string_view keywordName, + std::string_view keywordValue, icu::ByteSink& sink, UErrorCode& status); diff --git a/icu4c/source/common/unicode/locid.h b/icu4c/source/common/unicode/locid.h index 60282d623dc9..e1afd598cf9b 100644 --- a/icu4c/source/common/unicode/locid.h +++ b/icu4c/source/common/unicode/locid.h @@ -727,7 +727,9 @@ class U_COMMON_API Locale : public UObject { * * @stable ICU 49 */ - void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status); + void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) { + setKeywordValue(StringPiece{keywordName}, StringPiece{keywordValue}, status); + } /** * Sets or removes the value for a keyword.