Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

csv import date: Add dateformat "Locale" to pick current locale #2011

Merged
merged 1 commit into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 85 additions & 2 deletions libgnucash/engine/gnc-datetime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
#include <boost/date_time/local_time/local_time.hpp>
#include <boost/locale.hpp>
#include <boost/regex.hpp>
#include <unicode/smpdtfmt.h>
#include <unicode/locid.h>
#include <unicode/udat.h>
#include <unicode/parsepos.h>
#include <unicode/calendar.h>
#include <libintl.h>
#include <locale.h>
#include <map>
Expand Down Expand Up @@ -70,6 +75,8 @@ static const TZ_Ptr utc_zone(new boost::local_time::posix_time_zone("UTC-0"));
void _set_tzp(TimeZoneProvider& tz);
void _reset_tzp();

static Date gregorian_date_from_locale_string (const std::string& str);

/* To ensure things aren't overly screwed up by setting the nanosecond clock for boost::date_time. Don't do it, though, it doesn't get us anything and slows down the date/time library. */
#ifndef BOOST_DATE_TIME_HAS_NANOSECONDS
static constexpr auto ticks_per_second = INT64_C(1000000);
Expand All @@ -78,14 +85,15 @@ static constexpr auto ticks_per_second = INT64_C(1000000000);
#endif

/* Vector of date formats understood by gnucash and corresponding regex
* to parse each from an external source
* and/or string->gregorian_date to parse each from an external source
* Note: while the format names are using a "-" as separator, the
* regexes will accept any of "-/.' " and will also work for dates
* without separators.
*/
const std::vector<GncDateFormat> GncDate::c_formats ({
GncDateFormat {
N_("y-m-d"),
boost::gregorian::from_string,
"(?:" // either y-m-d
"(?<YEAR>[0-9]+)[-/.' ]+"
"(?<MONTH>[0-9]+)[-/.' ]+"
Expand All @@ -98,6 +106,7 @@ const std::vector<GncDateFormat> GncDate::c_formats ({
},
GncDateFormat {
N_("d-m-y"),
boost::gregorian::from_uk_string,
"(?:" // either d-m-y
"(?<DAY>[0-9]+)[-/.' ]+"
"(?<MONTH>[0-9]+)[-/.' ]+"
Expand All @@ -110,6 +119,7 @@ const std::vector<GncDateFormat> GncDate::c_formats ({
},
GncDateFormat {
N_("m-d-y"),
boost::gregorian::from_us_string,
"(?:" // either m-d-y
"(?<MONTH>[0-9]+)[-/.' ]+"
"(?<DAY>[0-9]+)[-/.' ]+"
Expand Down Expand Up @@ -145,7 +155,8 @@ const std::vector<GncDateFormat> GncDate::c_formats ({
"(?<DAY>[0-9]{2})"
"(?<YEAR>[0-9]+)?"
")"
}
},
GncDateFormat { N_("Locale"), gregorian_date_from_locale_string },
});

/** Private implementation of GncDateTime. See the documentation for that class.
Expand Down Expand Up @@ -607,6 +618,65 @@ GncDateTimeImpl::timestamp()
return str.substr(0, 8) + str.substr(9, 15);
}

struct ICUResources
{
std::unique_ptr<icu::DateFormat> formatter;
std::unique_ptr<icu::Calendar> calendar;
};

static ICUResources&
get_icu_resources()
{
static ICUResources rv;

if (!rv.formatter)
{
icu::Locale locale;
if (auto lc_time_locale = setlocale (LC_TIME, nullptr))
{
std::string localeStr(lc_time_locale);
if (size_t dotPos = localeStr.find('.'); dotPos != std::string::npos)
localeStr = localeStr.substr(0, dotPos);

locale = icu::Locale::createCanonical (localeStr.c_str());
}

rv.formatter.reset(icu::DateFormat::createDateInstance(icu::DateFormat::kDefault, locale));
if (!rv.formatter)
throw std::invalid_argument("Cannot create date formatter.");

UErrorCode status = U_ZERO_ERROR;
rv.calendar.reset(icu::Calendar::createInstance(locale, status));
if (U_FAILURE(status))
throw std::invalid_argument("Cannot create calendar instance.");

rv.calendar->setLenient(false);
}

return rv;
}

static Date
gregorian_date_from_locale_string (const std::string& str)
{
ICUResources& resources = get_icu_resources();

icu::UnicodeString input = icu::UnicodeString::fromUTF8(str);
icu::ParsePosition parsePos;
UDate date = resources.formatter->parse(input, parsePos);
if (parsePos.getErrorIndex() != -1 || parsePos.getIndex() != input.length())
throw std::invalid_argument ("Cannot parse string");

UErrorCode status = U_ZERO_ERROR;
resources.calendar->setTime(date, status);
if (U_FAILURE(status))
throw std::invalid_argument ("Cannot set calendar time");

return Date (resources.calendar->get(UCAL_YEAR, status),
resources.calendar->get(UCAL_MONTH, status) + 1,
resources.calendar->get(UCAL_DATE, status));
}

/* Member function definitions for GncDateImpl.
*/
GncDateImpl::GncDateImpl(const std::string str, const std::string fmt) :
Expand All @@ -617,6 +687,19 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string fmt) :
if (iter == GncDate::c_formats.cend())
throw std::invalid_argument(N_("Unknown date format specifier passed as argument."));

if (iter->m_str_to_date)
{
try
{
m_greg = (*iter->m_str_to_date)(str);
return;
}
catch (...) {} // with any string->date exception, try regex
}

if (iter->m_re.empty())
throw std::invalid_argument ("No regex pattern available");

boost::regex r(iter->m_re);
boost::smatch what;
if(!boost::regex_search(str, what, r)) // regex didn't find a match
Expand Down
11 changes: 11 additions & 0 deletions libgnucash/engine/gnc-datetime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
#include <memory>
#include <string>
#include <vector>
#include <functional>
#include <optional>

#include <boost/date_time/gregorian/gregorian.hpp>

typedef struct
{
Expand Down Expand Up @@ -172,6 +176,8 @@ class GncDateTime
* GncDate::c_formats class variable and work with those.
*/

using StringToDate = std::function<boost::gregorian::date(const std::string&)>;

class GncDateFormat
{
public:
Expand All @@ -182,13 +188,18 @@ class GncDateFormat
*/
GncDateFormat (const char* fmt, const char* re) :
m_fmt(fmt), m_re(re) {}
GncDateFormat (const char* fmt, StringToDate str_to_date, const char* re) :
m_fmt(fmt), m_re(re), m_str_to_date(str_to_date) {}
GncDateFormat (const char* fmt, StringToDate str_to_date) :
m_fmt(fmt), m_str_to_date(str_to_date) {}
/** A string representing the format. */
const std::string m_fmt;
private:
/** Regular expression associated with the format string. This is to and
* only be used internally by the gnc-datetime code.
*/
const std::string m_re;
std::optional<StringToDate> m_str_to_date;

friend class GncDateImpl;
};
Expand Down
38 changes: 38 additions & 0 deletions libgnucash/engine/test/gtest-gnc-datetime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ TEST(gnc_date_constructors, test_str_format_constructor)
{ "y-m-d", "1985.3.12", 1985, 3, 12},
{ "y-m-d", "3'6'8", 2003, 6, 8},
{ "y-m-d", "20130801", 2013, 8, 1},
{ "y-m-d", "2013 Aug 1", 2013, 8, 1},
{ "y-m-d", "2013 Aug 01",2013, 8, 1},
{ "y-m-d", "2013 August 01", 2013, 8, 1},
{ "y-m-d", "2013-August-1", 2013, 8, 1},
{ "y-m-d", "2009/Nov/04",2009, 11, 4},
{ "y-m-d","1985.Mar.12", 1985, 3, 12},
{ "d-m-y", "01-08-2013", 2013, 8, 1},
{ "d-m-y", "01-8-2013", 2013, 8, 1},
{ "d-m-y", "1-08-2013", 2013, 8, 1},
Expand All @@ -101,6 +107,9 @@ TEST(gnc_date_constructors, test_str_format_constructor)
{ "d-m-y", "12.3.1985", 1985, 3, 12},
{ "d-m-y", "8'6'3", 2003, 6, 8},
{ "d-m-y", "01082013", 2013, 8, 1},
{ "d-m-y", "1 Aug 2013", 2013, 8, 1},
{ "d-m-y", "1 Sep 2013", 2013, 9, 1},
{ "d-m-y", "1 September 2013", 2013, 9, 1},
{ "m-d-y", "08-01-2013", 2013, 8, 1},
{ "m-d-y", "8-01-2013", 2013, 8, 1},
{ "m-d-y", "08-1-2013", 2013, 8, 1},
Expand All @@ -113,6 +122,8 @@ TEST(gnc_date_constructors, test_str_format_constructor)
{ "m-d-y", "3.12.1985", 1985, 3, 12},
{ "m-d-y", "6'8'3", 2003, 6, 8},
{ "m-d-y", "08012013", 2013, 8, 1},
{ "m-d-y", "November 4, 2009", 2009, 11, 4},
{ "m-d-y", "Nov 4, 2009", 2009, 11, 4},
{ "d-m", "01-08", curr_year, 8, 1},
{ "d-m", "01-8", curr_year, 8, 1},
{ "d-m", "1-08", curr_year, 8, 1},
Expand All @@ -130,6 +141,29 @@ TEST(gnc_date_constructors, test_str_format_constructor)
{ "m-d", "6'8", curr_year, 6, 8},
{ "m-d", "0801", curr_year, 8, 1},

// invalid dates
{ "d-m-y", "0 Aug 2013", -1, -1, -1},
christopherlam marked this conversation as resolved.
Show resolved Hide resolved
{ "d-m-y", "31 Sep 2013", -1, -1, -1},
{ "d-m-y", "31 September 2013", -1, -1, -1},
{ "d-m-y", "31/11/2009", -2, -2, -2},
{ "d-m-y", "34.3.1985", -2, -2, -2},
{ "m-d-y", "November 41, 2009", -1, -1, -1},
{ "m-d-y", "Nov 31, 2009", -1, -1, -1},
{ "y-m-d", "2013 Aug 0", -1, -1, -1},
{ "y-m-d", "2013 Feb 30", -1, -1, -1},
{ "y-m-d", "2013 August 0", -1, -1, -1},
{ "y-m-d", "2013-June-31", -1, -1, -1},
{ "y-m-d", "2009/Nov/0", -1, -1, -1},
{ "y-m-d", "1985.Mar.32", -1, -1, -1},

// 2-digit dates are not parsable with months as words
{ "d-m-y", "1 Sep 13", -1, -1, -1},
{ "d-m-y", "1 September 13", -1, -1, -1},
{ "m-d-y", "November 4, 24", -1, -1, -1},
{ "m-d-y", "Nov 4, 23", -1, -1, -1},
{ "m-d-y", "Nov 29, 24", -1, -1, -1},
{ "y-m-d", "13-June-11", -1, -1, -1},

// ambiguous date formats
// current parser doesn't know how to disambiguate
// and hence refuses to parse
Expand Down Expand Up @@ -186,6 +220,10 @@ TEST(gnc_date_constructors, test_str_format_constructor)
{
got_year = got_month = got_day = -1;
}
catch (const std::out_of_range&)
{
got_year = got_month = got_day = -2;
}

EXPECT_TRUE ((got_year == test_dates[i].exp_year) &&
(got_month == test_dates[i].exp_month) &&
Expand Down
Loading