Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-23017 Add long conversion functionality to ICU4C #3339

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions icu4c/source/i18n/i18n.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
<ClCompile Include="iso8601cal.cpp" />
<ClCompile Include="japancal.cpp" />
<ClCompile Include="listformatter.cpp" />
<ClCompile Include="long-conversion-string-to-long.cpp" />
<ClCompile Include="ulistformatter.cpp" />
<ClCompile Include="measfmt.cpp" />
<ClCompile Include="measunit.cpp" />
Expand Down Expand Up @@ -398,6 +399,7 @@
<ClInclude Include="islamcal.h" />
<ClInclude Include="iso8601cal.h" />
<ClInclude Include="japancal.h" />
<ClInclude Include="long-conversion-string-to-long.h" />
<ClInclude Include="measunit_impl.h" />
<ClInclude Include="msgfmt_impl.h" />
<ClInclude Include="nfrlist.h" />
Expand Down
6 changes: 6 additions & 0 deletions icu4c/source/i18n/i18n.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@
<ClCompile Include="listformatter.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="long-conversion-string-to-long.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="ulistformatter.cpp">
<Filter>formatting</Filter>
</ClCompile>
Expand Down Expand Up @@ -902,6 +905,9 @@
<ClInclude Include="japancal.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="long-conversion-string-to-long.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="measunit_impl.h">
<Filter>formatting</Filter>
</ClInclude>
Expand Down
2 changes: 2 additions & 0 deletions icu4c/source/i18n/i18n_uwp.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@
<ClCompile Include="iso8601cal.cpp" />
<ClCompile Include="japancal.cpp" />
<ClCompile Include="listformatter.cpp" />
<ClCompile Include="long-conversion-string-to-long.cpp" />
<ClCompile Include="ulistformatter.cpp" />
<ClCompile Include="measfmt.cpp" />
<ClCompile Include="measunit.cpp" />
Expand Down Expand Up @@ -629,6 +630,7 @@
<ClInclude Include="islamcal.h" />
<ClInclude Include="iso8601cal.h" />
<ClInclude Include="japancal.h" />
<ClInclude Include="long-conversion-string-to-long.h" />
<ClInclude Include="measunit_impl.h" />
<ClInclude Include="msgfmt_impl.h" />
<ClInclude Include="nfrlist.h" />
Expand Down
186 changes: 186 additions & 0 deletions icu4c/source/i18n/long-conversion-string-to-long.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
// © 2025 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
// From the long-conversion library. Original license:
//
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING

// ICU PATCH: Do not include std::locale.

#include <climits>
// #include <locale>
#include <cmath>

// ICU PATCH: Customize header file paths for ICU.

#include "long-conversion-string-to-long.h"
#include "unicode/uchar.h"

// ICU PATCH: Wrap in ICU namespace
U_NAMESPACE_BEGIN

#ifdef _MSC_VER
#if _MSC_VER >= 1900
// Fix MSVC >= 2015 (_MSC_VER == 1900) warning
// C4244: 'argument': conversion from 'const uc16' to 'char', possible loss of data
// against Advance and friends, when instantiated with **it as char, not uc16.
__pragma(warning(disable : 4244))
#endif
#if _MSC_VER <= 1700 // VS2012, see IsDecimalDigitForRadix warning fix, below
#define VS2012_RADIXWARN
#endif
#endif

namespace long_conversion {
namespace {

// Converts a string (that contains only digits) to a long.
// For example:
// stringToLongPlain("12345") -> 12345
// stringToLongPlain("0") -> 0
// stringToLongPlain("+1") -> 1
// stringToLongPlain("9223372036854775807") -> 9223372036854775807
// stringToLongPlain("9223372036854775808" /* more than maximum */) -> Error
// stringToLongPlain("-9223372036854775808") -> -9223372036854775808
// stringToLongPlain("-9223372036854775809" /* less than minimum */) -> Error
// stringToLongPlain("++1234560" /* representation error */) -> Error
int64_t stringToLongPlain(StringPiece number, UErrorCode &status) {
if (U_FAILURE(status)) {
return 0;
}

uint64_t absolute = 0;
bool negative = false;
uint64_t absolute_max = LONG_MAX + 1; // 9223372036854775808
for (int32_t i = 0; i < number.length(); i++) {
// Handle the sign.
if (i == 0 && number.data()[i] == '+') {
continue;
}
if (i == 0 && number.data()[i] == '-') {
negative = true;
continue;
}

// Handle the digits.
if (u_isdigit(number.data()[i])) {
int32_t digit = number.data()[i] - '0';
if (absolute > (absolute_max - digit) /
10.0) { // TODO: the check can be more accurate than using double.
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}

absolute = absolute * 10 + digit;
}

// Handle the junk.
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}

if (negative) {
return -absolute;
}

if (absolute > LONG_MAX) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}

return absolute;
}

int64_t power10(int64_t exponent, UErrorCode &status) {
if (U_FAILURE(status)) {
return 0;
}

if (exponent < -18 || exponent > 18) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}

int64_t result = exponent < 0 ? -1 : 1;
int64_t base = 10;
while (exponent != 0) {
if (exponent % 2 == 1) {
result *= base;
}

exponent /= 2;
base *= base;
}

return result;
}

} // namespace

int64_t StringToLongConverter::stringToLong(StringPiece number, UErrorCode & status) const {
if (number.empty()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}

int32_t exponent_index = 0;
while (exponent_index < number.length() && number.data()[exponent_index] != 'e' &&
number.data()[exponent_index] != 'E') {
exponent_index++;
}

auto baseString = number.substr(0, exponent_index);
auto exponentString = number.substr(exponent_index + 1);

int64_t base = stringToLongPlain(baseString, status);
int64_t exponent = stringToLongPlain(exponentString, status);
int64_t power = power10(exponent, status);

if (status != U_ZERO_ERROR) {
return 0;
}

if ((base > 0 && power > 0 && base > LONG_MAX / power) ||
(base > 0 && power < 0 && base > LONG_MIN / power) ||
(base < 0 && power > 0 && base < LONG_MIN / power) ||
(base < 0 && power < 0 && base < LONG_MAX / power)) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}

return base * power;
}
} // namespace long_conversion

// ICU PATCH: Close ICU namespace
U_NAMESPACE_END
#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING
83 changes: 83 additions & 0 deletions icu4c/source/i18n/long-conversion-string-to-long.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// © 2025 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
// From the long-conversion library. Original license:
//
// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING

#ifndef LONG_CONVERSION_STRING_TO_LONG_H_
#define LONG_CONVERSION_STRING_TO_LONG_H_

#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "measunit_impl.h"
#include "resource.h"
#include "uarrsort.h"
#include "uassert.h"
#include "ucln_in.h"
#include "umutex.h"
#include "unicode/bytestrie.h"
#include "unicode/bytestriebuilder.h"
#include "unicode/localpointer.h"
#include "unicode/stringpiece.h"
#include "unicode/stringtriebuilder.h"
#include "unicode/ures.h"
#include "unicode/ustringtrie.h"
#include "uresimp.h"
#include "util.h"
#include <cstdlib>


// ICU PATCH: Wrap in ICU namespace
U_NAMESPACE_BEGIN

#include "unicode/stringpiece.h"


namespace long_conversion {

class StringToLongConverter {
public:
StringToLongConverter() = default;
~StringToLongConverter() = default;

int64_t stringToLong(StringPiece number, UErrorCode& status) const;
};

} // namespace long_conversion

// ICU PATCH: Close ICU namespace
U_NAMESPACE_END

#endif // LONG_CONVERSION_STRING_TO_LONG_H_
#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING
1 change: 1 addition & 0 deletions icu4c/source/i18n/sources.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ islamcal.cpp
iso8601cal.cpp
japancal.cpp
listformatter.cpp
long-conversion-string-to-long.cpp
measfmt.cpp
measunit.cpp
measunit_extra.cpp
Expand Down
5 changes: 5 additions & 0 deletions icu4c/source/test/depstest/dependencies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,11 @@ group: double_conversion
deps
platform

group: long_conversion
long-conversion-string-to-long.o
deps
platform

group: number_representation
number_decimalquantity.o string_segment.o number_utils.o
# TODO(ICU-21058) Move formatted_string_builder to its own unit.
Expand Down
Loading
Loading