From 663e0dd459deee60a47962325b885721671092b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Sat, 3 May 2025 20:20:55 +0200 Subject: [PATCH 1/4] ``: make `collate::hash` return the same hash for strings collating the same --- stl/inc/locale | 4 ++-- .../tests/GH_005236_collate_facet/test.cpp | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/stl/inc/locale b/stl/inc/locale index cc08f4d4ee..d493db8278 100644 --- a/stl/inc/locale +++ b/stl/inc/locale @@ -183,8 +183,8 @@ protected: _NODISCARD virtual long __CLR_OR_THIS_CALL do_hash(const _Elem* _First, const _Elem* _Last) const { // compute hash code for [_First, _Last) - _Adl_verify_range(_First, _Last); - return static_cast(_Hash_array_representation(_First, static_cast(_Last - _First))); + const auto _Sortkey = collate::do_transform(_First, _Last); + return static_cast(_Hash_array_representation(_Sortkey.data(), _Sortkey.size())); } private: diff --git a/tests/std/tests/GH_005236_collate_facet/test.cpp b/tests/std/tests/GH_005236_collate_facet/test.cpp index 4938f5cc1a..c50403b0d0 100644 --- a/tests/std/tests/GH_005236_collate_facet/test.cpp +++ b/tests/std/tests/GH_005236_collate_facet/test.cpp @@ -96,6 +96,24 @@ void test_gh_5210() { #endif // !defined(SKIP_COLLATE_TRANSFORM_TESTS) } +void test_gh_5212_compare_hash(const collate& coll, wstring string1, wstring string2) { + assert(coll.hash(string1.data(), string1.data() + string1.size()) + == coll.hash(string2.data(), string2.data() + string2.size())); +} + +// GH-5212: std::collate_byname<_Elem>::hash() yields different hashes for strings that collate the same +void test_gh_5212() { + const locale loc("de-DE_phoneb"); + const auto& coll = use_facet>(loc); + + // sharp s collates like "ss" + test_gh_5212_compare_hash(coll, L"Strasse", L"Stra\u00DFe"); // U+00DF LATIN SMALL LETTER SHARP S + // umlaut a collates like "ae" + test_gh_5212_compare_hash(coll, L"Kaetzchen", L"K\u00E4tzchen"); // U+00E4 LATIN SMALL LETTER A WITH DIAERESIS + // umlaut A collates like "AE" + test_gh_5212_compare_hash(coll, L"AErmel", L"\u00C4rmel"); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS +} + // GH-5236 "std::collate does not respect collation order when compiled with /MD(d) /Zc:wchar_t-" void test_gh_5236() { const wchar_t Ue = L'\u00DC'; // U+00DC LATIN CAPITAL LETTER U WITH DIARESIS @@ -117,5 +135,6 @@ void test_gh_5236() { int main() { test_gh_5210(); + test_gh_5212(); test_gh_5236(); } From a6ef62edeb98117f2c9824b1147536ec0e147bae Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sat, 3 May 2025 16:45:46 -0700 Subject: [PATCH 2/4] Take `const wstring&`. --- tests/std/tests/GH_005236_collate_facet/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/tests/GH_005236_collate_facet/test.cpp b/tests/std/tests/GH_005236_collate_facet/test.cpp index c50403b0d0..bd9ea184af 100644 --- a/tests/std/tests/GH_005236_collate_facet/test.cpp +++ b/tests/std/tests/GH_005236_collate_facet/test.cpp @@ -96,7 +96,7 @@ void test_gh_5210() { #endif // !defined(SKIP_COLLATE_TRANSFORM_TESTS) } -void test_gh_5212_compare_hash(const collate& coll, wstring string1, wstring string2) { +void test_gh_5212_compare_hash(const collate& coll, const wstring& string1, const wstring& string2) { assert(coll.hash(string1.data(), string1.data() + string1.size()) == coll.hash(string2.data(), string2.data() + string2.size())); } From ecd336252f8686162bc9de740b5874172f203778 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sat, 3 May 2025 16:54:28 -0700 Subject: [PATCH 3/4] Guard `coll.hash()` with `SKIP_COLLATE_TRANSFORM_TESTS`. --- tests/std/tests/GH_005236_collate_facet/test.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/std/tests/GH_005236_collate_facet/test.cpp b/tests/std/tests/GH_005236_collate_facet/test.cpp index bd9ea184af..c1fb2af398 100644 --- a/tests/std/tests/GH_005236_collate_facet/test.cpp +++ b/tests/std/tests/GH_005236_collate_facet/test.cpp @@ -96,6 +96,10 @@ void test_gh_5210() { #endif // !defined(SKIP_COLLATE_TRANSFORM_TESTS) } +// GH-5469 fixed this by making collate::do_hash() call collate::do_transform() +#ifdef SKIP_COLLATE_TRANSFORM_TESTS +void test_gh_5212() {} +#else // ^^^ defined(SKIP_COLLATE_TRANSFORM_TESTS) / !defined(SKIP_COLLATE_TRANSFORM_TESTS) vvv void test_gh_5212_compare_hash(const collate& coll, const wstring& string1, const wstring& string2) { assert(coll.hash(string1.data(), string1.data() + string1.size()) == coll.hash(string2.data(), string2.data() + string2.size())); @@ -113,6 +117,7 @@ void test_gh_5212() { // umlaut A collates like "AE" test_gh_5212_compare_hash(coll, L"AErmel", L"\u00C4rmel"); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS } +#endif // ^^^ !defined(SKIP_COLLATE_TRANSFORM_TESTS) ^^^ // GH-5236 "std::collate does not respect collation order when compiled with /MD(d) /Zc:wchar_t-" void test_gh_5236() { From e19ccde07c09d4778fc4c08661e60372aa50c141 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 4 May 2025 03:57:39 -0700 Subject: [PATCH 4/4] Revert "Guard `coll.hash()` with `SKIP_COLLATE_TRANSFORM_TESTS`." This reverts commit ecd336252f8686162bc9de740b5874172f203778. --- tests/std/tests/GH_005236_collate_facet/test.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/std/tests/GH_005236_collate_facet/test.cpp b/tests/std/tests/GH_005236_collate_facet/test.cpp index c1fb2af398..bd9ea184af 100644 --- a/tests/std/tests/GH_005236_collate_facet/test.cpp +++ b/tests/std/tests/GH_005236_collate_facet/test.cpp @@ -96,10 +96,6 @@ void test_gh_5210() { #endif // !defined(SKIP_COLLATE_TRANSFORM_TESTS) } -// GH-5469 fixed this by making collate::do_hash() call collate::do_transform() -#ifdef SKIP_COLLATE_TRANSFORM_TESTS -void test_gh_5212() {} -#else // ^^^ defined(SKIP_COLLATE_TRANSFORM_TESTS) / !defined(SKIP_COLLATE_TRANSFORM_TESTS) vvv void test_gh_5212_compare_hash(const collate& coll, const wstring& string1, const wstring& string2) { assert(coll.hash(string1.data(), string1.data() + string1.size()) == coll.hash(string2.data(), string2.data() + string2.size())); @@ -117,7 +113,6 @@ void test_gh_5212() { // umlaut A collates like "AE" test_gh_5212_compare_hash(coll, L"AErmel", L"\u00C4rmel"); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS } -#endif // ^^^ !defined(SKIP_COLLATE_TRANSFORM_TESTS) ^^^ // GH-5236 "std::collate does not respect collation order when compiled with /MD(d) /Zc:wchar_t-" void test_gh_5236() {