Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22879 Provide collator based predicates for use with STL algorithms #3149

Merged
merged 2 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions docs/userguide/collation/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ value, such as `ucol_greater`, `ucol_greaterOrEqual`, `ucol_equal` (in C)
`Collator::greater`, `Collator::greaterOrEqual`, `Collator::equal` (in C++) and
`Collator.equals` (in Java).

As of ICU 76 there are also C++ convenience functions and templates to create
standard library compliant comparison function objects that use a collator to
perform comparisons (instead of using the comparison operators on the strings
being compared), such as `Collator::less()` for a C++ API `Collator` or
`collator::less()` for a C API `UCollator`.

### Examples

**C:**
Expand Down Expand Up @@ -238,6 +244,48 @@ delete coll;
}
```

**C++:** (as of ICU 76)

```c++
icu::ErrorCode status;
icu::Locale locale = icu::Locale::forLanguageTag("sv", status);
icu::LocalPointer<Collator> collator(icu::Collator::createInstance(locale, status), status);
status.assertSuccess(); // Override ErrorCode::handleFailure() to handle failure.

std::vector<std::string> utf8{
"Arnold", "Øystein", "Ingrid", "Åke", "Olof", "İsmail", "Örjan",
};

std::sort(utf8.begin(), utf8.end(), collator->less());

std::vector<UnicodeString> utf16{
u"Arnold", u"Øystein", u"Ingrid", u"Åke", u"Olof", u"İsmail", u"Örjan",
};

std::sort(utf16.begin(), utf16.end(), collator->less());
```

**C++:** (calling the ICU C API, as of ICU 76)
markusicu marked this conversation as resolved.
Show resolved Hide resolved

```c++
UErrorCode status = U_ZERO_ERROR;
icu::LocalUCollatorPointer ucollator(ucol_open("sv", &status));
assert(U_SUCCESS(status));
assert(ucollator.isValid());

std::vector<std::string> utf8{
"Arnold", "Øystein", "Ingrid", "Åke", "Olof", "İsmail", "Örjan",
};

std::sort(utf8.begin(), utf8.end(), icu::header::collator::less(ucollator.getAlias()));

std::vector<std::u16string> utf16{
u"Arnold", u"Øystein", u"Ingrid", u"Åke", u"Olof", u"İsmail", u"Örjan",
};

std::sort(utf16.begin(), utf16.end(), icu::header::collator::less(ucollator.getAlias()));
```

**Java:**

```java
Expand Down
93 changes: 93 additions & 0 deletions icu4c/source/i18n/unicode/coll.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,18 @@

#if !UCONFIG_NO_COLLATION

#include <functional>
#include <string_view>
#include <type_traits>

#include "unicode/char16ptr.h"
#include "unicode/uobject.h"
#include "unicode/ucol.h"
#include "unicode/unorm.h"
#include "unicode/locid.h"
#include "unicode/uniset.h"
#include "unicode/umisc.h"
#include "unicode/unistr.h"
#include "unicode/uiter.h"
#include "unicode/stringpiece.h"

Expand Down Expand Up @@ -588,6 +594,52 @@ class U_I18N_API Collator : public UObject {
*/
UBool equals(const UnicodeString& source, const UnicodeString& target) const;

#ifndef U_HIDE_DRAFT_API

/**
* Creates a comparison function object that uses this collator.
* Like <code>std::equal_to</code> but uses the collator instead of <code>operator==</code>.
* @draft ICU 76
*/
inline auto equal_to() const { return Predicate<std::equal_to, UCOL_EQUAL>(*this); }

/**
* Creates a comparison function object that uses this collator.
* Like <code>std::greater</code> but uses the collator instead of <code>operator&gt;</code>.
* @draft ICU 76
*/
inline auto greater() const { return Predicate<std::equal_to, UCOL_GREATER>(*this); }

/**
* Creates a comparison function object that uses this collator.
* Like <code>std::less</code> but uses the collator instead of <code>operator&lt;</code>.
* @draft ICU 76
*/
inline auto less() const { return Predicate<std::equal_to, UCOL_LESS>(*this); }

/**
* Creates a comparison function object that uses this collator.
* Like <code>std::not_equal_to</code> but uses the collator instead of <code>operator!=</code>.
* @draft ICU 76
*/
inline auto not_equal_to() const { return Predicate<std::not_equal_to, UCOL_EQUAL>(*this); }

/**
* Creates a comparison function object that uses this collator.
* Like <code>std::greater_equal</code> but uses the collator instead of <code>operator&gt;=</code>.
* @draft ICU 76
*/
inline auto greater_equal() const { return Predicate<std::not_equal_to, UCOL_LESS>(*this); }

/**
* Creates a comparison function object that uses this collator.
* Like <code>std::less_equal</code> but uses the collator instead of <code>operator&lt;=</code>.
* @draft ICU 76
*/
inline auto less_equal() const { return Predicate<std::not_equal_to, UCOL_GREATER>(*this); }

#endif // U_HIDE_DRAFT_API

#ifndef U_FORCE_HIDE_DEPRECATED_API
/**
* Determines the minimum strength that will be used in comparison or
Expand Down Expand Up @@ -1210,6 +1262,47 @@ class U_I18N_API Collator : public UObject {
friend class ICUCollatorService;
static Collator* makeInstance(const Locale& desiredLocale,
UErrorCode& status);

#ifndef U_HIDE_DRAFT_API
/**
* Function object for performing comparisons using a Collator.
* @internal
*/
template <template <typename...> typename Compare, UCollationResult result>
class Predicate {
public:
explicit Predicate(const Collator& parent) : collator(parent) {}

template <
typename T, typename U,
typename = std::enable_if_t<ConvertibleToU16StringView<T> && ConvertibleToU16StringView<U>>>
bool operator()(const T& lhs, const U& rhs) const {
UErrorCode status = U_ZERO_ERROR;
return compare(
collator.compare(
UnicodeString::readOnlyAlias(lhs),
UnicodeString::readOnlyAlias(rhs),
status),
result);
}

bool operator()(std::string_view lhs, std::string_view rhs) const {
UErrorCode status = U_ZERO_ERROR;
return compare(collator.compareUTF8(lhs, rhs, status), result);
}

#if defined(__cpp_char8_t)
bool operator()(std::u8string_view lhs, std::u8string_view rhs) const {
UErrorCode status = U_ZERO_ERROR;
return compare(collator.compareUTF8(lhs, rhs, status), result);
}
#endif

private:
const Collator& collator;
static constexpr Compare<UCollationResult> compare{};
};
#endif // U_HIDE_DRAFT_API
};

#if !UCONFIG_NO_SERVICE
Expand Down
124 changes: 124 additions & 0 deletions icu4c/source/i18n/unicode/ucol.h
Original file line number Diff line number Diff line change
Expand Up @@ -1519,6 +1519,130 @@ ucol_openBinary(const uint8_t *bin, int32_t length,
const UCollator *base,
UErrorCode *status);

#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API

#include <functional>
#include <string_view>
#include <type_traits>

#include "unicode/char16ptr.h"
#include "unicode/stringpiece.h"
#include "unicode/unistr.h"

namespace U_HEADER_ONLY_NAMESPACE {

#ifndef U_HIDE_DRAFT_API

namespace collator {

namespace internal {

/**
* Function object for performing comparisons using a UCollator.
* @internal
*/
template <template <typename...> typename Compare, UCollationResult result>
class Predicate {
public:
/** @internal */
explicit Predicate(const UCollator* ucol) : collator(ucol) {}

/** @internal */
template <
typename T, typename U,
typename = std::enable_if_t<ConvertibleToU16StringView<T> && ConvertibleToU16StringView<U>>>
bool operator()(const T& lhs, const U& rhs) const {
return match(UnicodeString::readOnlyAlias(lhs), UnicodeString::readOnlyAlias(rhs));
}

/** @internal */
bool operator()(std::string_view lhs, std::string_view rhs) const {
return match(lhs, rhs);
}

#if defined(__cpp_char8_t)
/** @internal */
bool operator()(std::u8string_view lhs, std::u8string_view rhs) const {
return match(lhs, rhs);
}
#endif

private:
bool match(UnicodeString lhs, UnicodeString rhs) const {
return compare(
ucol_strcoll(
collator,
toUCharPtr(lhs.getBuffer()), lhs.length(),
toUCharPtr(rhs.getBuffer()), rhs.length()),
result);
}

bool match(StringPiece lhs, StringPiece rhs) const {
UErrorCode status = U_ZERO_ERROR;
return compare(
ucol_strcollUTF8(
collator,
lhs.data(), lhs.length(),
rhs.data(), rhs.length(),
&status),
result);
}

const UCollator* const collator;
static constexpr Compare<UCollationResult> compare{};
};

} // namespace internal

/**
* Function object for performing comparisons using this collator.
* Like <code>std::equal_to</code> but uses the collator instead of <code>operator==</code>.
* @draft ICU 76
*/
using equal_to = internal::Predicate<std::equal_to, UCOL_EQUAL>;

/**
* Function object for performing comparisons using this collator.
* Like <code>std::greater</code> but uses the collator instead of <code>operator&gt;</code>.
* @draft ICU 76
*/
using greater = internal::Predicate<std::equal_to, UCOL_GREATER>;

/**
* Function object for performing comparisons using this collator.
* Like <code>std::less</code> but uses the collator instead of <code>operator&lt;</code>.
* @draft ICU 76
*/
using less = internal::Predicate<std::equal_to, UCOL_LESS>;

/**
* Function object for performing comparisons using this collator.
* Like <code>std::not_equal_to</code> but uses the collator instead of <code>operator!=</code>.
* @draft ICU 76
*/
using not_equal_to = internal::Predicate<std::not_equal_to, UCOL_EQUAL>;

/**
* Function object for performing comparisons using this collator.
* Like <code>std::greater_equal</code> but uses the collator instead of <code>operator&gt;=</code>.
* @draft ICU 76
*/
using greater_equal = internal::Predicate<std::not_equal_to, UCOL_LESS>;

/**
* Function object for performing comparisons using this collator.
* Like <code>std::less_equal</code> but uses the collator instead of <code>operator&lt;=</code>.
* @draft ICU 76
*/
using less_equal = internal::Predicate<std::not_equal_to, UCOL_GREATER>;

} // namespace collator

#endif // U_HIDE_DRAFT_API

} // namespace U_HEADER_ONLY_NAMESPACE

#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API

#endif /* #if !UCONFIG_NO_COLLATION */

Expand Down
Loading