Skip to content

Commit

Permalink
Fix parsing of numbers in floating point format to integers
Browse files Browse the repository at this point in the history
When parsing a string like "123.456" to an integer the ICU backend would
first parse it greedily to a floating point value and then cast/truncate
it to an integer.
Set the flag to only parse integers when parsing to an integral number.
Care must be taken not to set that when parsing e.g. a currency or date
to an integer where the truncation is intended.
  • Loading branch information
Flamefire committed Nov 10, 2024
1 parent ff91706 commit 0faf1cb
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 2 deletions.
2 changes: 2 additions & 0 deletions doc/changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
/*!
\page changelog Changelog

- 1.88.0
- Fix parsing of numbers in floating point format to integers
- 1.86.0
- Make ICU implementation of `to_title` threadsafe
- Add allocator support to `utf_to_utf`
Expand Down
10 changes: 8 additions & 2 deletions src/boost/locale/icu/formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ namespace boost { namespace locale { namespace impl_icu {
public:
typedef std::basic_string<CharType> string_type;

number_format(icu::NumberFormat& fmt, std::string codepage) : cvt_(codepage), icu_fmt_(fmt) {}
number_format(icu::NumberFormat& fmt, const std::string& codepage, bool isNumberOnly = false) :
cvt_(codepage), icu_fmt_(fmt), isNumberOnly_(isNumberOnly)
{}

string_type format(double value, size_t& code_points) const override { return do_format(value, code_points); }
string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); }
Expand Down Expand Up @@ -107,6 +109,9 @@ namespace boost { namespace locale { namespace impl_icu {
icu::ParsePosition pp;
icu::UnicodeString tmp = cvt_.icu(str.data(), str.data() + str.size());

// For the plain number parsing (no currency etc) parse "123.456" as 2 ints
// not a float later converted to int
icu_fmt_.setParseIntegerOnly(std::is_integral<ValueType>::value && isNumberOnly_);
icu_fmt_.parse(tmp, val, pp);

ValueType tmp_v;
Expand All @@ -122,6 +127,7 @@ namespace boost { namespace locale { namespace impl_icu {

icu_std_converter<CharType> cvt_;
icu::NumberFormat& icu_fmt_;
const bool isNumberOnly_;
};

template<typename CharType>
Expand Down Expand Up @@ -355,7 +361,7 @@ namespace boost { namespace locale { namespace impl_icu {
icu::NumberFormat& nf =
cache.number_format((how == std::ios_base::scientific) ? num_fmt_type::sci : num_fmt_type::number);
set_fraction_digits(nf, how, ios.precision());
return ptr_type(new number_format<CharType>(nf, encoding));
return ptr_type(new number_format<CharType>(nf, encoding, true));
}
case currency: {
icu::NumberFormat& nf = cache.number_format(
Expand Down
82 changes: 82 additions & 0 deletions test/formatting_common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
//
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

#include <boost/locale/formatting.hpp>
#include <boost/locale/generator.hpp>
#include <cstdint>
#include <limits>
#include <sstream>

#include "../src/boost/locale/util/foreach_char.hpp"
#include "boostLocale/test/tools.hpp"
#include "boostLocale/test/unit_test.hpp"

template<typename CharType, typename IntType>
void test_parse_multi_number_by_char(const std::locale& locale)
{
std::basic_istringstream<CharType> stream;
stream.imbue(locale);
stream.str(ascii_to<CharType>("42.12,345"));
stream >> boost::locale::as::number;

IntType value;
TEST_REQUIRE(stream >> value);
TEST_EQ(value, IntType(42));
TEST_EQ(static_cast<char>(stream.get()), '.');
TEST_REQUIRE(stream >> value);
TEST_EQ(value, IntType(12345));
TEST_REQUIRE(!(stream >> value));
TEST(stream.eof());

stream.str(ascii_to<CharType>("42.25,678"));
stream.clear();
float fValue;
TEST_REQUIRE(stream >> fValue);
TEST_EQ(fValue, 42.25);
TEST_EQ(static_cast<char>(stream.get()), ',');
TEST_REQUIRE(stream >> value);
TEST_EQ(value, IntType(678));
TEST_REQUIRE(!(stream >> value));
TEST(stream.eof());

// Parsing a floating point currency to integer truncates the floating point value but fully parses it
stream.str(ascii_to<CharType>("USD1,234.55,67.89"));
stream.clear();
TEST_REQUIRE(!(stream >> value));
stream.clear();
stream >> boost::locale::as::currency >> boost::locale::as::currency_iso;
if(stream >> value) { // Parsing currencies not fully supported by WinAPI backend
TEST_EQ(value, IntType(1234));
TEST_EQ(static_cast<char>(stream.get()), ',');
TEST_REQUIRE(stream >> boost::locale::as::number >> value);
TEST_EQ(value, IntType(67));
TEST(!stream.eof());
}
}

/// Test that parsing multiple numbers without any spaces works as expected
void test_parse_multi_number()
{
const auto locale = boost::locale::generator{}("en_US.UTF-8");

#define BOOST_LOCALE_CALL_I(T, I) \
std::cout << "\t" #I << std::endl; \
test_parse_multi_number_by_char<T, I>(locale);

#define BOOST_LOCALE_CALL(T) \
std::cout << "test_parse_multi_number " #T << std::endl; \
BOOST_LOCALE_CALL_I(T, int16_t); \
BOOST_LOCALE_CALL_I(T, uint16_t); \
BOOST_LOCALE_CALL_I(T, int32_t); \
BOOST_LOCALE_CALL_I(T, uint32_t); \
BOOST_LOCALE_CALL_I(T, int64_t); \
BOOST_LOCALE_CALL_I(T, uint64_t);

BOOST_LOCALE_CALL(char);
BOOST_LOCALE_CALL(wchar_t);
#undef BOOST_LOCALE_CALL
#undef BOOST_LOCALE_CALL_I
}
3 changes: 3 additions & 0 deletions test/test_formatting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include "boostLocale/test/tools.hpp"
#include "boostLocale/test/unit_test.hpp"
#include "formatting_common.hpp"

const std::string test_locale_name = "en_US";
std::string message_path = "./";
Expand Down Expand Up @@ -928,6 +929,8 @@ void test_main(int argc, char** argv)
test_manip<char32_t>();
test_format_class<char32_t>();
#endif

test_parse_multi_number();
}

// boostinspect:noascii
Expand Down
2 changes: 2 additions & 0 deletions test/test_posix_formatting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#endif
#include "boostLocale/test/tools.hpp"
#include "boostLocale/test/unit_test.hpp"
#include "formatting_common.hpp"

#ifdef BOOST_LOCALE_NO_POSIX_BACKEND
// Dummy just to make it compile
Expand Down Expand Up @@ -185,6 +186,7 @@ void test_main(int /*argc*/, char** /*argv*/)
TEST(v == "12345,45" || v == "12 345,45" || v == "12.345,45");
}
}
test_parse_multi_number();
}

// boostinspect:noascii
2 changes: 2 additions & 0 deletions test/test_std_formatting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "boostLocale/test/tools.hpp"
#include "boostLocale/test/unit_test.hpp"
#include "formatting_common.hpp"

template<typename CharType, typename RefCharType>
void test_by_char(const std::locale& l, const std::locale& lreal)
Expand Down Expand Up @@ -230,6 +231,7 @@ void test_main(int /*argc*/, char** /*argv*/)
}
}
}
test_parse_multi_number();
}

// boostinspect:noascii
2 changes: 2 additions & 0 deletions test/test_winapi_formatting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "../src/boost/locale/win32/lcid.hpp"
#include "boostLocale/test/tools.hpp"
#include "boostLocale/test/unit_test.hpp"
#include "formatting_common.hpp"

template<typename CharType>
void test_by_char(const std::locale& l, std::string name, int lcid)
Expand Down Expand Up @@ -176,6 +177,7 @@ void test_main(int /*argc*/, char** /*argv*/)
test_by_char<wchar_t>(l, name, name_lcid.second);
}
}
test_parse_multi_number();
std::cout << "- Testing strftime" << std::endl;
test_date_time(gen("en_US.UTF-8"));
}
Expand Down

0 comments on commit 0faf1cb

Please sign in to comment.