From c395b13bd4fbcedea766b235fdc6e70f9d6e66f0 Mon Sep 17 00:00:00 2001 From: Christopher Lam Date: Sun, 1 Sep 2024 16:08:55 +0800 Subject: [PATCH 1/6] GncDateFormat -> locales part2: save/restore: date-format -> locales --- .../csv-imp/assistant-csv-price-import.cpp | 19 ++- .../csv-imp/assistant-csv-trans-import.cpp | 20 ++- .../csv-imp/gnc-imp-props-price.cpp | 2 +- .../csv-imp/gnc-imp-props-price.hpp | 6 +- .../csv-imp/gnc-imp-props-tx.cpp | 8 +- .../csv-imp/gnc-imp-props-tx.hpp | 14 +- .../csv-imp/gnc-imp-settings-csv.cpp | 10 +- .../csv-imp/gnc-imp-settings-csv.hpp | 4 +- .../csv-imp/gnc-import-price.cpp | 10 +- .../csv-imp/gnc-import-price.hpp | 4 +- .../import-export/csv-imp/gnc-import-tx.cpp | 14 +- .../import-export/csv-imp/gnc-import-tx.hpp | 4 +- libgnucash/core-utils/CMakeLists.txt | 1 + libgnucash/core-utils/gnc-locale-utils.cpp | 12 ++ libgnucash/core-utils/gnc-locale-utils.hpp | 3 + libgnucash/core-utils/test/CMakeLists.txt | 1 + libgnucash/engine/gnc-datetime.cpp | 144 +++++------------- libgnucash/engine/gnc-datetime.hpp | 50 +----- 18 files changed, 122 insertions(+), 204 deletions(-) diff --git a/gnucash/import-export/csv-imp/assistant-csv-price-import.cpp b/gnucash/import-export/csv-imp/assistant-csv-price-import.cpp index ddb0a2a1e40..59308bf800e 100644 --- a/gnucash/import-export/csv-imp/assistant-csv-price-import.cpp +++ b/gnucash/import-export/csv-imp/assistant-csv-price-import.cpp @@ -35,6 +35,7 @@ #include #include +#include "gnc-locale-utils.hpp" #include "gnc-ui.h" #include "gnc-uri-utils.h" #include "gnc-ui-util.h" @@ -659,8 +660,8 @@ CsvImpPriceAssist::CsvImpPriceAssist () /* Add in the date format combo box and hook it up to an event handler. */ date_format_combo = GTK_COMBO_BOX_TEXT(gtk_combo_box_text_new()); - for (auto& date_fmt : GncDate::c_formats) - gtk_combo_box_text_append_text (date_format_combo, _(date_fmt.m_fmt.c_str())); + for (auto locale : gnc_get_available_locales()) + gtk_combo_box_text_append_text (date_format_combo, _(locale.c_str())); gtk_combo_box_set_active (GTK_COMBO_BOX(date_format_combo), 0); g_signal_connect (G_OBJECT(date_format_combo), "changed", G_CALLBACK(csv_price_imp_preview_date_fmt_sel_cb), this); @@ -1159,7 +1160,11 @@ CsvImpPriceAssist::preview_update_encoding (const char* encoding) void CsvImpPriceAssist::preview_update_date_format () { - price_imp->date_format (gtk_combo_box_get_active (GTK_COMBO_BOX(date_format_combo))); + if (char *text = gtk_combo_box_text_get_active_text(date_format_combo)) + { + price_imp->date_locale (text); + g_free (text); + } preview_refresh_table (); } @@ -1764,8 +1769,12 @@ CsvImpPriceAssist::preview_refresh () (price_imp->file_format() != GncImpFileFormat::CSV)); // This section deals with the combo's and character encoding - gtk_combo_box_set_active (GTK_COMBO_BOX(date_format_combo), - price_imp->date_format()); + auto locales = gnc_get_available_locales(); + auto locale_it = std::find (locales.begin(), locales.end(), price_imp->date_locale()); + if (locale_it != locales.end()) + gtk_combo_box_set_active (GTK_COMBO_BOX(date_format_combo), + std::distance (locales.begin(), locale_it)); + gtk_combo_box_set_active (GTK_COMBO_BOX(currency_format_combo), price_imp->currency_format()); go_charmap_sel_set_encoding (encselector, price_imp->encoding().c_str()); diff --git a/gnucash/import-export/csv-imp/assistant-csv-trans-import.cpp b/gnucash/import-export/csv-imp/assistant-csv-trans-import.cpp index d1cb2480eaa..b441dda9cfa 100644 --- a/gnucash/import-export/csv-imp/assistant-csv-trans-import.cpp +++ b/gnucash/import-export/csv-imp/assistant-csv-trans-import.cpp @@ -43,6 +43,7 @@ #include "gnc-ui-util.h" #include "dialog-utils.h" +#include "gnc-locale-utils.hpp" #include "gnc-component-manager.h" #include "gnc-state.h" @@ -602,8 +603,9 @@ CsvImpTransAssist::CsvImpTransAssist () /* Add in the date format combo box and hook it up to an event handler. */ date_format_combo = GTK_COMBO_BOX_TEXT(gtk_combo_box_text_new()); - for (auto& date_fmt : GncDate::c_formats) - gtk_combo_box_text_append_text (date_format_combo, _(date_fmt.m_fmt.c_str())); + for (auto locale : gnc_get_available_locales()) + gtk_combo_box_text_append_text (date_format_combo, _(locale.c_str())); + gtk_combo_box_set_active (GTK_COMBO_BOX(date_format_combo), 0); g_signal_connect (G_OBJECT(date_format_combo), "changed", G_CALLBACK(csv_tximp_preview_date_fmt_sel_cb), this); @@ -1137,7 +1139,11 @@ CsvImpTransAssist::preview_update_encoding (const char* encoding) void CsvImpTransAssist::preview_update_date_format () { - tx_imp->date_format (gtk_combo_box_get_active (GTK_COMBO_BOX(date_format_combo))); + if (char *text = gtk_combo_box_text_get_active_text(date_format_combo)) + { + tx_imp->date_locale (text); + g_free (text); + } preview_refresh_table (); } @@ -1690,8 +1696,12 @@ CsvImpTransAssist::preview_refresh () (tx_imp->file_format() != GncImpFileFormat::CSV)); // Set Date & Currency Format and Character encoding - gtk_combo_box_set_active (GTK_COMBO_BOX(date_format_combo), - tx_imp->date_format()); + auto locales = gnc_get_available_locales(); + auto locale_it = std::find (locales.begin(), locales.end(), tx_imp->date_locale()); + if (locale_it != locales.end()) + gtk_combo_box_set_active (GTK_COMBO_BOX(date_format_combo), + std::distance (locales.begin(), locale_it)); + gtk_combo_box_set_active (GTK_COMBO_BOX(currency_format_combo), tx_imp->currency_format()); go_charmap_sel_set_encoding (encselector, tx_imp->encoding().c_str()); diff --git a/gnucash/import-export/csv-imp/gnc-imp-props-price.cpp b/gnucash/import-export/csv-imp/gnc-imp-props-price.cpp index 666b4098ea3..edac5c5b988 100644 --- a/gnucash/import-export/csv-imp/gnc-imp-props-price.cpp +++ b/gnucash/import-export/csv-imp/gnc-imp-props-price.cpp @@ -162,7 +162,7 @@ void GncImportPrice::set (GncPricePropType prop_type, const std::string& value, { case GncPricePropType::DATE: m_date.reset(); - m_date = GncDate(value, GncDate::c_formats[m_date_format].m_fmt); // Throws if parsing fails + m_date = GncDate(value, m_date_locale); // Throws if parsing fails break; case GncPricePropType::AMOUNT: diff --git a/gnucash/import-export/csv-imp/gnc-imp-props-price.hpp b/gnucash/import-export/csv-imp/gnc-imp-props-price.hpp index fb35cb39688..4e25409785e 100644 --- a/gnucash/import-export/csv-imp/gnc-imp-props-price.hpp +++ b/gnucash/import-export/csv-imp/gnc-imp-props-price.hpp @@ -83,11 +83,11 @@ GncNumeric parse_amount_price (const std::string &str, int currency_format); struct GncImportPrice { public: - GncImportPrice (int date_format, int currency_format) : m_date_format{date_format}, + GncImportPrice (std::string date_locale, int currency_format) : m_date_locale{date_locale}, m_currency_format{currency_format}{}; void set (GncPricePropType prop_type, const std::string& value, bool enable_test_empty); - void set_date_format (int date_format) { m_date_format = date_format ;} + void set_date_locale (std::string date_locale) { m_date_locale = date_locale ;} void set_currency_format (int currency_format) { m_currency_format = currency_format ;} void reset (GncPricePropType prop_type); std::string verify_essentials (void); @@ -102,7 +102,7 @@ struct GncImportPrice std::string errors(); private: - int m_date_format; + std::string m_date_locale; int m_currency_format; std::optional m_date; std::optional m_amount; diff --git a/gnucash/import-export/csv-imp/gnc-imp-props-tx.cpp b/gnucash/import-export/csv-imp/gnc-imp-props-tx.cpp index 49ab27ab358..2ab08ad24d3 100644 --- a/gnucash/import-export/csv-imp/gnc-imp-props-tx.cpp +++ b/gnucash/import-export/csv-imp/gnc-imp-props-tx.cpp @@ -239,7 +239,7 @@ void GncPreTrans::set (GncTransPropType prop_type, const std::string& value) case GncTransPropType::DATE: m_date.reset(); if (!value.empty()) - m_date = GncDate(value, GncDate::c_formats[m_date_format].m_fmt); // Throws if parsing fails + m_date = GncDate(value, m_date_locale); else if (!m_multi_split) throw std::invalid_argument ( (bl::format (std::string{_("Date field can not be empty if 'Multi-split' option is unset.\n")}) % @@ -527,15 +527,13 @@ void GncPreSplit::set (GncTransPropType prop_type, const std::string& value) case GncTransPropType::REC_DATE: m_rec_date.reset(); if (!value.empty()) - m_rec_date = GncDate (value, - GncDate::c_formats[m_date_format].m_fmt); // Throws if parsing fails + m_rec_date = GncDate (value, m_date_locale); // Throws if parsing fails break; case GncTransPropType::TREC_DATE: m_trec_date.reset(); if (!value.empty()) - m_trec_date = GncDate (value, - GncDate::c_formats[m_date_format].m_fmt); // Throws if parsing fails + m_trec_date = GncDate (value, m_date_locale); // Throws if parsing fails break; default: diff --git a/gnucash/import-export/csv-imp/gnc-imp-props-tx.hpp b/gnucash/import-export/csv-imp/gnc-imp-props-tx.hpp index 967950e755c..78c4dbc6fee 100644 --- a/gnucash/import-export/csv-imp/gnc-imp-props-tx.hpp +++ b/gnucash/import-export/csv-imp/gnc-imp-props-tx.hpp @@ -154,11 +154,11 @@ struct DraftTransaction class GncPreTrans { public: - GncPreTrans(int date_format, bool multi_split) - : m_date_format{date_format}, m_multi_split{multi_split}, m_currency{nullptr} {}; + GncPreTrans(const std::string date_locale, bool multi_split) + : m_date_locale{date_locale}, m_multi_split{multi_split}, m_currency{nullptr} {}; void set (GncTransPropType prop_type, const std::string& value); - void set_date_format (int date_format) { m_date_format = date_format ;} + void set_date_locale (const std::string date_locale) { m_date_locale = date_locale ;} void set_multi_split (bool multi_split) { m_multi_split = multi_split ;} void reset (GncTransPropType prop_type); StrVec verify_essentials (void); @@ -190,7 +190,7 @@ class GncPreTrans private: - int m_date_format; + std::string m_date_locale; bool m_multi_split; std::optional m_differ; std::optional m_date; @@ -221,12 +221,12 @@ class GncPreTrans class GncPreSplit { public: - GncPreSplit (int date_format, int currency_format) : m_date_format{date_format}, + GncPreSplit (const std::string date_locale, int currency_format) : m_date_locale{date_locale}, m_currency_format{currency_format} {}; void set (GncTransPropType prop_type, const std::string& value); void reset (GncTransPropType prop_type); void add (GncTransPropType prop_type, const std::string& value); - void set_date_format (int date_format) { m_date_format = date_format ;} + void set_date_locale (const std::string date_locale) { m_date_locale = date_locale ;} void set_currency_format (int currency_format) { m_currency_format = currency_format; } void set_pre_trans (std::shared_ptr pre_trans) { m_pre_trans = pre_trans; } std::shared_ptr get_pre_trans (void) { return m_pre_trans; } @@ -241,7 +241,7 @@ class GncPreSplit void UpdateCrossSplitCounters (); std::shared_ptr m_pre_trans; - int m_date_format; + std::string m_date_locale; int m_currency_format; std::optional m_action; std::optional m_account; diff --git a/gnucash/import-export/csv-imp/gnc-imp-settings-csv.cpp b/gnucash/import-export/csv-imp/gnc-imp-settings-csv.cpp index ae5e6f396f1..e04a0d9e6a2 100644 --- a/gnucash/import-export/csv-imp/gnc-imp-settings-csv.cpp +++ b/gnucash/import-export/csv-imp/gnc-imp-settings-csv.cpp @@ -34,6 +34,7 @@ #include #include +#include "gnc-locale-utils.hpp" #include "Account.h" #include "gnc-state.h" #include "gnc-ui-util.h" @@ -150,7 +151,7 @@ CsvImportSettings::load (void) if (key_char) g_free (key_char); - m_date_format = g_key_file_get_integer (keyfile, group.c_str(), CSV_DATE, &key_error); + m_date_locale = g_key_file_get_string (keyfile, group.c_str(), CSV_DATE, &key_error); m_load_error |= handle_load_error (&key_error, group); m_currency_format = g_key_file_get_integer (keyfile, group.c_str(), CSV_CURRENCY, &key_error); @@ -203,13 +204,12 @@ CsvImportSettings::save (void) (m_file_format == GncImpFileFormat::CSV) ? true : false); g_key_file_set_string (keyfile, group.c_str(), CSV_SEP, m_separators.c_str()); - g_key_file_set_integer (keyfile, group.c_str(), CSV_DATE, m_date_format); + g_key_file_set_string (keyfile, group.c_str(), CSV_DATE, m_date_locale.c_str()); std::ostringstream cmt_ss; cmt_ss << "Supported date formats: "; int fmt_num = 0; - std::for_each (GncDate::c_formats.cbegin(), GncDate::c_formats.cend(), - [&cmt_ss, &fmt_num](const GncDateFormat& fmt) - { cmt_ss << fmt_num++ << ": '" << fmt.m_fmt << "', "; }); + for (auto loc : gnc_get_available_locales()) + cmt_ss << fmt_num++ << ": '" << loc << "', "; auto cmt = cmt_ss.str().substr(0, static_cast(cmt_ss.tellp()) - 2); g_key_file_set_comment (keyfile, group.c_str(), CSV_DATE, cmt.c_str(), nullptr); g_key_file_set_integer (keyfile, group.c_str(), CSV_CURRENCY, m_currency_format); diff --git a/gnucash/import-export/csv-imp/gnc-imp-settings-csv.hpp b/gnucash/import-export/csv-imp/gnc-imp-settings-csv.hpp index 39b48e65ff8..c2f532fe5bb 100644 --- a/gnucash/import-export/csv-imp/gnc-imp-settings-csv.hpp +++ b/gnucash/import-export/csv-imp/gnc-imp-settings-csv.hpp @@ -50,7 +50,7 @@ enum SETTINGS_COL {SET_GROUP, SET_NAME}; struct CsvImportSettings { CsvImportSettings() : m_file_format (GncImpFileFormat::CSV), m_encoding {"UTF-8"}, - m_date_format {0}, m_currency_format {0}, + m_date_locale {"en_AU"}, m_currency_format {0}, m_skip_start_lines{0}, m_skip_end_lines{0}, m_skip_alt_lines (false), m_separators {","}, m_load_error {false} { } virtual ~CsvImportSettings() = default; @@ -75,7 +75,7 @@ void remove (void); std::string m_name; // Name given to this preset by the user GncImpFileFormat m_file_format; // CSV import Format std::string m_encoding; // File encoding -int m_date_format; // Date Active id +std::string m_date_locale; // Date Active id int m_currency_format; // Currency Active id uint32_t m_skip_start_lines; // Number of header rows to skip uint32_t m_skip_end_lines; // Number of footer rows to skip diff --git a/gnucash/import-export/csv-imp/gnc-import-price.cpp b/gnucash/import-export/csv-imp/gnc-import-price.cpp index ee3cb0b7140..98fc1d9d63f 100644 --- a/gnucash/import-export/csv-imp/gnc-import-price.cpp +++ b/gnucash/import-export/csv-imp/gnc-import-price.cpp @@ -213,15 +213,15 @@ void GncPriceImport::currency_format (int currency_format) } int GncPriceImport::currency_format () { return m_settings.m_currency_format; } -void GncPriceImport::date_format (int date_format) +void GncPriceImport::date_locale (std::string date_locale) { - m_settings.m_date_format = date_format; + m_settings.m_date_locale = date_locale; /* Reparse all date related columns */ std::vector dates = { GncPricePropType::DATE }; reset_formatted_column (dates); } -int GncPriceImport::date_format () { return m_settings.m_date_format; } +std::string GncPriceImport::date_locale () { return m_settings.m_date_locale; } /** Converts raw file data using a new encoding. This function must be * called after load_file only if load_file guessed @@ -385,7 +385,7 @@ void GncPriceImport::tokenize (bool guessColTypes) auto length = tokenized_line.size(); if (length > 0) m_parsed_lines.push_back (std::make_tuple (tokenized_line, std::string(), - std::make_shared(date_format(), currency_format()), + std::make_shared(date_locale(), currency_format()), false)); if (length > max_cols) max_cols = length; @@ -750,7 +750,7 @@ GncPriceImport::set_column_type_price (uint32_t position, GncPricePropType type, /* Reset date and currency formats for each price props object * to ensure column updates use the most recent one */ - std::get(*parsed_lines_it)->set_date_format (m_settings.m_date_format); + std::get(*parsed_lines_it)->set_date_locale (m_settings.m_date_locale); std::get(*parsed_lines_it)->set_currency_format (m_settings.m_currency_format); uint32_t row = parsed_lines_it - m_parsed_lines.begin(); diff --git a/gnucash/import-export/csv-imp/gnc-import-price.hpp b/gnucash/import-export/csv-imp/gnc-import-price.hpp index 4ada98ca9b0..0f705a3074f 100644 --- a/gnucash/import-export/csv-imp/gnc-import-price.hpp +++ b/gnucash/import-export/csv-imp/gnc-import-price.hpp @@ -100,8 +100,8 @@ class GncPriceImport void currency_format (int currency_format); int currency_format (); - void date_format (int date_format); - int date_format (); + void date_locale (std::string date_locale); + std::string date_locale (); void encoding (const std::string& encoding); std::string encoding (); diff --git a/gnucash/import-export/csv-imp/gnc-import-tx.cpp b/gnucash/import-export/csv-imp/gnc-import-tx.cpp index 6c2f2837792..242f1cf3b8d 100644 --- a/gnucash/import-export/csv-imp/gnc-import-tx.cpp +++ b/gnucash/import-export/csv-imp/gnc-import-tx.cpp @@ -228,9 +228,9 @@ void GncTxImport::currency_format (int currency_format) } int GncTxImport::currency_format () { return m_settings.m_currency_format; } -void GncTxImport::date_format (int date_format) +void GncTxImport::date_locale (std::string date_locale) { - m_settings.m_date_format = date_format; + m_settings.m_date_locale = date_locale; /* Reparse all date related columns */ std::vector dates = { GncTransPropType::DATE, @@ -238,7 +238,7 @@ void GncTxImport::date_format (int date_format) GncTransPropType::TREC_DATE}; reset_formatted_column (dates); } -int GncTxImport::date_format () { return m_settings.m_date_format; } +std::string GncTxImport::date_locale () { return m_settings.m_date_locale; } /** Converts raw file data using a new encoding. This function must be * called after load_file only if load_file guessed @@ -407,8 +407,8 @@ void GncTxImport::tokenize (bool guessColTypes) auto length = tokenized_line.size(); if (length > 0) { - auto pretrans = std::make_shared(date_format(), m_settings.m_multi_split); - auto presplit = std::make_shared(date_format(), currency_format()); + auto pretrans = std::make_shared(date_locale(), m_settings.m_multi_split); + auto presplit = std::make_shared(date_locale(), currency_format()); presplit->set_pre_trans (std::move (pretrans)); m_parsed_lines.push_back (std::make_tuple (tokenized_line, ErrMap(), presplit->get_pre_trans(), std::move (presplit), false)); @@ -781,7 +781,7 @@ void GncTxImport::update_pre_trans_props (parse_line_t& parsed_line, uint32_t co /* Reset date format for each trans props object * to ensure column updates use the most recent one */ - trans_props->set_date_format (m_settings.m_date_format); + trans_props->set_date_locale (m_settings.m_date_locale); trans_props->set_multi_split (m_settings.m_multi_split); if ((old_type > GncTransPropType::NONE) && (old_type <= GncTransPropType::TRANS_PROPS)) @@ -820,7 +820,7 @@ void GncTxImport::update_pre_split_props (parse_line_t& parsed_line, uint32_t co auto trans_props = std::get (parsed_line); /* Reset date format for each split props object * to ensure column updates use the most recent one */ - split_props->set_date_format (m_settings.m_date_format); + split_props->set_date_locale (m_settings.m_date_locale); if (m_settings.m_multi_split && trans_props->is_part_of( m_parent)) split_props->set_pre_trans (m_parent); else diff --git a/gnucash/import-export/csv-imp/gnc-import-tx.hpp b/gnucash/import-export/csv-imp/gnc-import-tx.hpp index f2bb4e2765e..595a48563b0 100644 --- a/gnucash/import-export/csv-imp/gnc-import-tx.hpp +++ b/gnucash/import-export/csv-imp/gnc-import-tx.hpp @@ -119,8 +119,8 @@ class GncTxImport void currency_format (int currency_format); int currency_format (); - void date_format (int date_format); - int date_format (); + void date_locale (std::string date_locale); + std::string date_locale (); void encoding (const std::string& encoding); std::string encoding (); diff --git a/libgnucash/core-utils/CMakeLists.txt b/libgnucash/core-utils/CMakeLists.txt index fc9e402abe4..9ea5d9412eb 100644 --- a/libgnucash/core-utils/CMakeLists.txt +++ b/libgnucash/core-utils/CMakeLists.txt @@ -52,6 +52,7 @@ target_link_libraries(gnc-core-utils PkgConfig::GLIB2 PRIVATE ${Boost_LIBRARIES} + ${ICU4C_I18N_LDFLAGS} ${GOBJECT_LDFLAGS} ${GTK_MAC_LDFLAGS} "$<$:${OSX_EXTRA_LIBRARIES}>") diff --git a/libgnucash/core-utils/gnc-locale-utils.cpp b/libgnucash/core-utils/gnc-locale-utils.cpp index c73a02dd597..210373ae868 100644 --- a/libgnucash/core-utils/gnc-locale-utils.cpp +++ b/libgnucash/core-utils/gnc-locale-utils.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include "gnc-locale-utils.hpp" #include @@ -115,3 +116,14 @@ gnc_get_boost_locale() } +std::vector +gnc_get_available_locales () +{ + std::vector rv; + auto num_locales{uloc_countAvailable()}; + rv.reserve (num_locales); + for (int32_t i = 0; i < num_locales; ++i) + if (auto localeID = uloc_getAvailable (i)) + rv.push_back (localeID); + return rv; +} diff --git a/libgnucash/core-utils/gnc-locale-utils.hpp b/libgnucash/core-utils/gnc-locale-utils.hpp index 1473d29a4ff..c33d28971bb 100644 --- a/libgnucash/core-utils/gnc-locale-utils.hpp +++ b/libgnucash/core-utils/gnc-locale-utils.hpp @@ -23,6 +23,7 @@ #define GNC_LOCALE_UTILS_HPP #include +#include #include /** Get the default application locale. @@ -63,4 +64,6 @@ void gnc_init_boost_locale(const std::string& messages_path); */ const std::locale& gnc_get_boost_locale(); +std::vector gnc_get_available_locales (); + #endif /* GNC_LOCALE_UTILS_HPP */ diff --git a/libgnucash/core-utils/test/CMakeLists.txt b/libgnucash/core-utils/test/CMakeLists.txt index 5d10837ff41..c48b9720888 100644 --- a/libgnucash/core-utils/test/CMakeLists.txt +++ b/libgnucash/core-utils/test/CMakeLists.txt @@ -36,6 +36,7 @@ set(gtest_core_utils_INCLUDES set(gtest_core_utils_LIBS PkgConfig::GLIB2 ${Boost_LIBRARIES} + ${ICU4C_I18N_LDFLAGS} ${GTHREAD_LDFLAGS} gtest) diff --git a/libgnucash/engine/gnc-datetime.cpp b/libgnucash/engine/gnc-datetime.cpp index 308ec24e4ef..f4e59fcf26a 100644 --- a/libgnucash/engine/gnc-datetime.cpp +++ b/libgnucash/engine/gnc-datetime.cpp @@ -45,6 +45,11 @@ #include #include "gnc-timezone.hpp" #include "gnc-datetime.hpp" +#include +#include +#include +#include +#include #define N_(string) string //So that xgettext will find it @@ -77,77 +82,6 @@ static constexpr auto ticks_per_second = INT64_C(1000000); static constexpr auto ticks_per_second = INT64_C(1000000000); #endif -/* Vector of date formats understood by gnucash and corresponding regex - * to parse each from an external source - * Note: while the format names are using a "-" as separator, the - * regexes will accept any of "-/.' " and will also work for dates - * without separators. - */ -const std::vector GncDate::c_formats ({ - GncDateFormat { - N_("y-m-d"), - "(?:" // either y-m-d - "(?[0-9]+)[-/.' ]+" - "(?[0-9]+)[-/.' ]+" - "(?[0-9]+)" - "|" // or CCYYMMDD - "(?[0-9]{4})" - "(?[0-9]{2})" - "(?[0-9]{2})" - ")" - }, - GncDateFormat { - N_("d-m-y"), - "(?:" // either d-m-y - "(?[0-9]+)[-/.' ]+" - "(?[0-9]+)[-/.' ]+" - "(?[0-9]+)" - "|" // or DDMMCCYY - "(?[0-9]{2})" - "(?[0-9]{2})" - "(?[0-9]{4})" - ")" - }, - GncDateFormat { - N_("m-d-y"), - "(?:" // either m-d-y - "(?[0-9]+)[-/.' ]+" - "(?[0-9]+)[-/.' ]+" - "(?[0-9]+)" - "|" // or MMDDCCYY - "(?[0-9]{2})" - "(?[0-9]{2})" - "(?[0-9]{4})" - ")" - }, - // Note year is still checked for in the regexes below - // This is to be able to raise an error if one is found for a yearless date format - GncDateFormat { - (N_("d-m")), - "(?:" // either d-m(-y) - "(?[0-9]+)[-/.' ]+" - "(?[0-9]+)(?:[-/.' ]+" - "(?[0-9]+))?" - "|" // or DDMM(CCYY) - "(?[0-9]{2})" - "(?[0-9]{2})" - "(?[0-9]+)?" - ")" - }, - GncDateFormat { - (N_("m-d")), - "(?:" // either m-d(-y) - "(?[0-9]+)[-/.' ]+" - "(?[0-9]+)(?:[-/.' ]+" - "(?[0-9]+))?" - "|" // or MMDD(CCYY) - "(?[0-9]{2})" - "(?[0-9]{2})" - "(?[0-9]+)?" - ")" - } -}); - /** Private implementation of GncDateTime. See the documentation for that class. */ static LDT @@ -609,42 +543,40 @@ GncDateTimeImpl::timestamp() /* Member function definitions for GncDateImpl. */ -GncDateImpl::GncDateImpl(const std::string str, const std::string fmt) : - m_greg(boost::gregorian::day_clock::local_day()) /* Temporarily initialized to today, will be used and adjusted in the code below */ -{ - auto iter = std::find_if(GncDate::c_formats.cbegin(), GncDate::c_formats.cend(), - [&fmt](const GncDateFormat& v){ return (v.m_fmt == fmt); } ); - if (iter == GncDate::c_formats.cend()) - throw std::invalid_argument(N_("Unknown date format specifier passed as argument.")); - - boost::regex r(iter->m_re); - boost::smatch what; - if(!boost::regex_search(str, what, r)) // regex didn't find a match - throw std::invalid_argument (N_("Value can't be parsed into a date using the selected date format.")); - - // Bail out if a year was found with a yearless format specifier - auto fmt_has_year = (fmt.find('y') != std::string::npos); - if (!fmt_has_year && (what.length("YEAR") != 0)) - throw std::invalid_argument (N_("Value appears to contain a year while the selected format forbids this.")); - - int year; - if (fmt_has_year) - { - /* The input dates have a year, so use that one */ - year = std::stoi (what.str("YEAR")); - - /* We assume two-digit years to be in the range 1969 - 2068. */ - if (year < 69) - year += 2000; - else if (year < 100) - year += 1900; - } - else /* The input dates have no year, so use current year */ - year = m_greg.year(); // Can use m_greg here as it was already initialized in the initializer list earlier +GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : + /* Temporarily initialized to today, will be used and adjusted in the code below */ + m_greg(boost::gregorian::day_clock::local_day()) +{ + UErrorCode status = U_ZERO_ERROR; + + icu::Locale locale = icu::Locale::createCanonical (locale_str.c_str()); + std::unique_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kDefault, locale)); + if (formatter == nullptr) + throw std::invalid_argument ("Cannot parse string"); + + icu::UnicodeString input = icu::UnicodeString::fromUTF8(str); + icu::ParsePosition parsePos; + + UDate date = formatter->parse(input, parsePos); + if (parsePos.getErrorIndex() != -1) + throw std::invalid_argument ("Cannot parse string"); + + std::unique_ptr calendar(icu::Calendar::createInstance(locale, status)); + if (U_FAILURE(status)) + throw std::invalid_argument ("Cannot parse string"); + + calendar->setTime(date, status); + if (U_FAILURE(status)) + throw std::invalid_argument ("Cannot parse string"); + + int32_t day = calendar->get(UCAL_DATE, status); + int32_t month = calendar->get(UCAL_MONTH, status) + 1; + int32_t year = calendar->get(UCAL_YEAR, status); + + if (U_FAILURE(status)) + throw std::invalid_argument ("Cannot parse string"); - m_greg = Date(year, - static_cast(std::stoi (what.str("MONTH"))), - std::stoi (what.str("DAY"))); + m_greg = Date(year, month, day); } gnc_ymd diff --git a/libgnucash/engine/gnc-datetime.hpp b/libgnucash/engine/gnc-datetime.hpp index 77a6039b10a..26f68d7e9f3 100644 --- a/libgnucash/engine/gnc-datetime.hpp +++ b/libgnucash/engine/gnc-datetime.hpp @@ -162,37 +162,6 @@ class GncDateTime std::unique_ptr m_impl; }; -/** GnuCash DateFormat class - * - * A helper class to represent a date format understood - * by the GncDate string/format constructor. Consumers - * of this header file are not supposed to create - * objects of this class themselves. Instead they - * can get a list of the understood formats from the - * GncDate::c_formats class variable and work with those. - */ - -class GncDateFormat -{ -public: - /** Construct a GncDateFormat with a given format and corresponding - * regular expression. This should only be used internally by the - * GncDate implementation. Consumers should never construct a GncDateFormat - * themselves! - */ - GncDateFormat (const char* fmt, const char* re) : - m_fmt(fmt), m_re(re) {} - /** A string representing the format. */ - const std::string m_fmt; -private: - /** Regular expression associated with the format string. This is to and - * only be used internally by the gnc-datetime code. - */ - const std::string m_re; - - friend class GncDateImpl; -}; - /** GnuCash Date class * * The represented date is limited to the period @@ -202,23 +171,6 @@ class GncDateFormat class GncDate { public: - /** A vector with all the date formats supported by the string constructor. - * The currently supported formats are: - * "y-m-d" (including yyyymmdd) - * "d-m-y" (including ddmmyyyy) - * "m-d-y" (including mmddyyyy) - * "d-m" (including ddmm) - * "m-d" (including mmdd) - * - * Notes: - * - while the format names are using a "-" as separator, the - * regexes will accept any of "-/.' " and will also work for dates - * without separators. - * - the format strings are marked for translation so it is possible - * to use a localized version of a format string using gettext. Example: - * gettext(GncDate::c_formats[0]) - */ - static const std::vector c_formats; /** Construct a GncDate representing the current day. */ GncDate(); @@ -252,7 +204,7 @@ class GncDate * (like month being 13, or day being 31 in February) * - fmt doesn't specify a year, yet a year was found in the string */ - GncDate(const std::string str, const std::string fmt); + GncDate(const std::string str, const std::string locale_str); /** Construct a GncDate from a GncDateImpl. */ GncDate(std::unique_ptr impl); From bf8a07a502a5db35977013a61cab8e7cfded4f85 Mon Sep 17 00:00:00 2001 From: Christopher Lam Date: Sun, 1 Sep 2024 16:19:50 +0800 Subject: [PATCH 2/6] [gnc-datetime.cpp] add debugging code --- libgnucash/engine/gnc-datetime.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/libgnucash/engine/gnc-datetime.cpp b/libgnucash/engine/gnc-datetime.cpp index f4e59fcf26a..9ac527dea74 100644 --- a/libgnucash/engine/gnc-datetime.cpp +++ b/libgnucash/engine/gnc-datetime.cpp @@ -549,6 +549,8 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : { UErrorCode status = U_ZERO_ERROR; + std::cout << locale_str << '|' << str << ": "; + icu::Locale locale = icu::Locale::createCanonical (locale_str.c_str()); std::unique_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kDefault, locale)); if (formatter == nullptr) @@ -559,23 +561,36 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : UDate date = formatter->parse(input, parsePos); if (parsePos.getErrorIndex() != -1) + { + std::cout << "1\n"; throw std::invalid_argument ("Cannot parse string"); + } std::unique_ptr calendar(icu::Calendar::createInstance(locale, status)); if (U_FAILURE(status)) + { + std::cout << "2\n"; throw std::invalid_argument ("Cannot parse string"); + } calendar->setTime(date, status); if (U_FAILURE(status)) + { + std::cout << "3\n"; throw std::invalid_argument ("Cannot parse string"); + } int32_t day = calendar->get(UCAL_DATE, status); int32_t month = calendar->get(UCAL_MONTH, status) + 1; int32_t year = calendar->get(UCAL_YEAR, status); if (U_FAILURE(status)) + { + std::cout << "4\n"; throw std::invalid_argument ("Cannot parse string"); + } + std::cout << day << '/' << month << '/' << year << std::endl; m_greg = Date(year, month, day); } From 1ab2854085a14acc56a2b273a793645f66200e7e Mon Sep 17 00:00:00 2001 From: Christopher Lam Date: Sun, 1 Sep 2024 22:33:47 +0800 Subject: [PATCH 3/6] fixup! fix gnc-datetime tests --- libgnucash/engine/test/gtest-gnc-datetime.cpp | 148 ++++++++---------- 1 file changed, 68 insertions(+), 80 deletions(-) diff --git a/libgnucash/engine/test/gtest-gnc-datetime.cpp b/libgnucash/engine/test/gtest-gnc-datetime.cpp index 70b8b1a614f..53e2d1ef298 100644 --- a/libgnucash/engine/test/gtest-gnc-datetime.cpp +++ b/libgnucash/engine/test/gtest-gnc-datetime.cpp @@ -71,95 +71,83 @@ typedef struct TEST(gnc_date_constructors, test_str_format_constructor) { auto today = GncDate(); - auto today_ymd = today.year_month_day(); - auto curr_year = today_ymd.year; + // auto today_ymd = today.year_month_day(); + // auto curr_year = today_ymd.year; parse_date_data test_dates[] = { // supported combinations -/.' - { "y-m-d", "2013-08-01", 2013, 8, 1}, - { "y-m-d", "2013-8-01", 2013, 8, 1}, - { "y-m-d", "2013-08-1", 2013, 8, 1}, - { "y-m-d", "2013-8-1", 2013, 8, 1}, - { "y-m-d", "13-08-01", 2013, 8, 1}, - { "y-m-d", "13-8-01", 2013, 8, 1}, - { "y-m-d", "13-08-1", 2013, 8, 1}, - { "y-m-d", "13-8-1", 2013, 8, 1}, - { "y-m-d", "2009/11/04", 2009, 11, 4}, - { "y-m-d", "1985.3.12", 1985, 3, 12}, - { "y-m-d", "3'6'8", 2003, 6, 8}, - { "y-m-d", "20130801", 2013, 8, 1}, - { "d-m-y", "01-08-2013", 2013, 8, 1}, - { "d-m-y", "01-8-2013", 2013, 8, 1}, - { "d-m-y", "1-08-2013", 2013, 8, 1}, - { "d-m-y", "1-8-2013", 2013, 8, 1}, - { "d-m-y", "01-08-13", 2013, 8, 1}, - { "d-m-y", "01-8-13", 2013, 8, 1}, - { "d-m-y", "1-08-13", 2013, 8, 1}, - { "d-m-y", "1-8-13", 2013, 8, 1}, - { "d-m-y", "04/11/2009", 2009, 11, 4}, - { "d-m-y", "12.3.1985", 1985, 3, 12}, - { "d-m-y", "8'6'3", 2003, 6, 8}, - { "d-m-y", "01082013", 2013, 8, 1}, - { "m-d-y", "08-01-2013", 2013, 8, 1}, - { "m-d-y", "8-01-2013", 2013, 8, 1}, - { "m-d-y", "08-1-2013", 2013, 8, 1}, - { "m-d-y", "8-1-2013", 2013, 8, 1}, - { "m-d-y", "08-01-13", 2013, 8, 1}, - { "m-d-y", "8-01-13", 2013, 8, 1}, - { "m-d-y", "08-1-13", 2013, 8, 1}, - { "m-d-y", "8-1-13", 2013, 8, 1}, - { "m-d-y", "11/04/2009", 2009, 11, 4}, - { "m-d-y", "3.12.1985", 1985, 3, 12}, - { "m-d-y", "6'8'3", 2003, 6, 8}, - { "m-d-y", "08012013", 2013, 8, 1}, - { "d-m", "01-08", curr_year, 8, 1}, - { "d-m", "01-8", curr_year, 8, 1}, - { "d-m", "1-08", curr_year, 8, 1}, - { "d-m", "1-8", curr_year, 8, 1}, - { "d-m", "04/11", curr_year, 11, 4}, - { "d-m", "12.3", curr_year, 3, 12}, - { "d-m", "8'6", curr_year, 6, 8}, - { "d-m", "0108", curr_year, 8, 1}, - { "m-d", "08-01", curr_year, 8, 1}, - { "m-d", "8-01", curr_year, 8, 1}, - { "m-d", "08-1", curr_year, 8, 1}, - { "m-d", "8-1", curr_year, 8, 1}, - { "m-d", "11/04", curr_year, 11, 4}, - { "m-d", "3.12", curr_year, 3, 12}, - { "m-d", "6'8", curr_year, 6, 8}, - { "m-d", "0801", curr_year, 8, 1}, + { "en_GB", "01-08-2013", 2013, 8, 1}, + { "en_GB", "01-8-2013", 2013, 8, 1}, + { "en_GB", "1-08-2013", 2013, 8, 1}, + { "en_GB", "1-8-2013", 2013, 8, 1}, + { "en_GB", "01-08-13", 2013, 8, 1}, + { "en_GB", "01-8-13", 2013, 8, 1}, + { "en_GB", "1-08-13", 2013, 8, 1}, + { "en_GB", "1-8-13", 2013, 8, 1}, + { "en_GB", "04/11/2009", 2009, 11, 4}, + { "en_GB", "12.3.1985", 1985, 3, 12}, + // { "en_GB", "8'6'3", 2003, 6, 8}, + // { "en_GB", "01082013", 2013, 8, 1}, + { "en_US", "08-01-2013", 2013, 8, 1}, + { "en_US", "8-01-2013", 2013, 8, 1}, + { "en_US", "08-1-2013", 2013, 8, 1}, + { "en_US", "8-1-2013", 2013, 8, 1}, + { "en_US", "08-01-13", 2013, 8, 1}, + { "en_US", "8-01-13", 2013, 8, 1}, + { "en_US", "08-1-13", 2013, 8, 1}, + { "en_US", "8-1-13", 2013, 8, 1}, + { "en_US", "11/04/2009", 2009, 11, 4}, + { "en_US", "3.12.1985", 1985, 3, 12}, + // { "en_US", "6'8'3", 2003, 6, 8}, + // { "en_US", "08012013", 2013, 8, 1}, + // { "d-m", "01-08", curr_year, 8, 1}, + // { "d-m", "01-8", curr_year, 8, 1}, + // { "d-m", "1-08", curr_year, 8, 1}, + // { "d-m", "1-8", curr_year, 8, 1}, + // { "d-m", "04/11", curr_year, 11, 4}, + // { "d-m", "12.3", curr_year, 3, 12}, + // { "d-m", "8'6", curr_year, 6, 8}, + // { "d-m", "0108", curr_year, 8, 1}, + // { "m-d", "08-01", curr_year, 8, 1}, + // { "m-d", "8-01", curr_year, 8, 1}, + // { "m-d", "08-1", curr_year, 8, 1}, + // { "m-d", "8-1", curr_year, 8, 1}, + // { "m-d", "11/04", curr_year, 11, 4}, + // { "m-d", "3.12", curr_year, 3, 12}, + // { "m-d", "6'8", curr_year, 6, 8}, + // { "m-d", "0801", curr_year, 8, 1}, // ambiguous date formats // current parser doesn't know how to disambiguate // and hence refuses to parse // can possibly improved with a smarter parser - { "y-m-d", "130801", -1, -1, -1}, - { "d-m-y", "010813", -1, -1, -1}, - { "m-d-y", "080113", -1, -1, -1}, + // { "y-m-d", "130801", -1, -1, -1}, + { "en_GB", "010813", -1, -1, -1}, + { "en_US", "080113", -1, -1, -1}, // Combinations that don't make sense // but can still be entered by a user // Should ideally all result in refusal to parse... - { "y-m-d", "08-01", -1, -1, -1}, - { "y-m-d", "0801", -1, -1, -1}, - { "d-m-y", "01-08", -1, -1, -1}, - { "d-m-y", "0108", -1, -1, -1}, - { "m-d-y", "08-01", -1, -1, -1}, - { "m-d-y", "0801", -1, -1, -1}, - { "d-m", "01-08-2013", -1, -1, -1}, - { "d-m", "01-08-13", -1, -1, -1}, - { "d-m", "08-08-08", -1, -1, -1}, - { "d-m", "01082013", -1, -1, -1}, - { "d-m", "010813", -1, -1, -1}, - { "d-m", "20130108", -1, -1, -1}, - { "m-d", "08-01-2013", -1, -1, -1}, - { "m-d", "08-01-13", -1, -1, -1}, - { "m-d", "2013-08-01", -1, -1, -1}, - { "m-d", "09-08-01", -1, -1, -1}, - { "m-d", "08012013", -1, -1, -1}, - { "m-d", "080113", -1, -1, -1}, - { "m-d", "20130801", -1, -1, -1}, + // { "y-m-d", "08-01", -1, -1, -1}, + // { "y-m-d", "0801", -1, -1, -1}, + { "en_GB", "01-08", -1, -1, -1}, + { "en_GB", "0108", -1, -1, -1}, + { "en_US", "08-01", -1, -1, -1}, + { "en_US", "0801", -1, -1, -1}, + // { "d-m", "01-08-2013", -1, -1, -1}, + // { "d-m", "01-08-13", -1, -1, -1}, + // { "d-m", "08-08-08", -1, -1, -1}, + // { "d-m", "01082013", -1, -1, -1}, + // { "d-m", "010813", -1, -1, -1}, + // { "d-m", "20130108", -1, -1, -1}, + // { "m-d", "08-01-2013", -1, -1, -1}, + // { "m-d", "08-01-13", -1, -1, -1}, + // { "m-d", "2013-08-01", -1, -1, -1}, + // { "m-d", "09-08-01", -1, -1, -1}, + // { "m-d", "08012013", -1, -1, -1}, + // { "m-d", "080113", -1, -1, -1}, + // { "m-d", "20130801", -1, -1, -1}, // Unknown date format specifier should also trigger an exception { "y-d-m H:M:S", "20130801", -1, -1, -1}, @@ -445,11 +433,11 @@ TEST(gnc_datetime_constructors, test_create_in_transition) * savings time it ended at 23:59:59 and the next second was * 01:00:00 so that's when the day starts. */ - GncDate date0{"2018-11-03", "y-m-d"}; + GncDate date0{"03-11-2018", "en_GB"}; GncDateTime gncdt0{date0, DayPart::end}; EXPECT_EQ(gncdt0.format_zulu("%Y-%m-%d %H:%M:%S %Z"), "2018-11-04 02:59:59 UTC"); EXPECT_EQ(gncdt0.format("%Y-%m-%d %H:%M:%S %Z"), "2018-11-03 23:59:59 -03"); - GncDate date1{"2018-11-04", "y-m-d"}; + GncDate date1{"04-11-2018", "en_GB"}; GncDateTime gncdt1{date1, DayPart::start}; EXPECT_EQ(gncdt1.format_zulu("%Y-%m-%d %H:%M:%S %Z"), "2018-11-04 03:00:00 UTC"); EXPECT_EQ(gncdt1.format("%Y-%m-%d %H:%M:%S %Z"), "2018-11-04 01:00:00 -02"); @@ -457,7 +445,7 @@ TEST(gnc_datetime_constructors, test_create_in_transition) * std time, i.e. -03. Unfortunately sometimes boost::date_time * decides that it's still DST and returns the first one. */ - GncDate date2{"2018-02-17", "y-m-d"}; + GncDate date2{"17-02-2018", "en_GB"}; GncDateTime gncdt2{date2, DayPart::end}; if (gncdt2.offset() == -7200) { @@ -473,7 +461,7 @@ TEST(gnc_datetime_constructors, test_create_in_transition) * Savings. This test checks to ensure that GncTimeZone doesn't * try to project 2018's rule forward. */ - GncDate date3{"2019-11-01", "y-m-d"}; + GncDate date3{"01-11-2019", "en_GB"}; GncDateTime gncdt3{date3, DayPart::start}; EXPECT_EQ(gncdt3.format_zulu("%Y-%m-%d %H:%M:%S %Z"), "2019-11-01 03:00:00 UTC"); EXPECT_EQ(gncdt3.format("%Y-%m-%d %H:%M:%S %Z"), "2019-11-01 00:00:00 -03"); From 8ab8958718fd69ba9729d248b18c942e1d4954c9 Mon Sep 17 00:00:00 2001 From: Christopher Lam Date: Fri, 6 Sep 2024 07:54:28 +0800 Subject: [PATCH 4/6] cache icu formatter and calendar for each locale --- libgnucash/engine/gnc-datetime.cpp | 50 +++++++++++++++++------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/libgnucash/engine/gnc-datetime.cpp b/libgnucash/engine/gnc-datetime.cpp index 9ac527dea74..24afbd0bfc5 100644 --- a/libgnucash/engine/gnc-datetime.cpp +++ b/libgnucash/engine/gnc-datetime.cpp @@ -541,54 +541,62 @@ GncDateTimeImpl::timestamp() return str.substr(0, 8) + str.substr(9, 15); } +using DateFormatPtr = std::shared_ptr; +using CalendarPtr = std::shared_ptr; + +static std::tuple +locale_to_formatter_and_calendar (const std::string locale_str) +{ + static std::map>> cache; + auto& tuple = cache[locale_str]; + if (!tuple) + { + auto locale = icu::Locale::createCanonical (locale_str.c_str()); + std::shared_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kDefault, locale)); + if (formatter == nullptr) + throw std::invalid_argument ("Cannot parse string"); + + UErrorCode status = U_ZERO_ERROR; + std::shared_ptr calendar(icu::Calendar::createInstance(locale, status)); + if (U_FAILURE(status)) + throw std::invalid_argument ("Cannot parse string"); + + tuple = std::make_tuple(std::move(formatter), std::move(calendar)); + } + + return *tuple; +} + /* Member function definitions for GncDateImpl. */ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : /* Temporarily initialized to today, will be used and adjusted in the code below */ m_greg(boost::gregorian::day_clock::local_day()) { - UErrorCode status = U_ZERO_ERROR; - std::cout << locale_str << '|' << str << ": "; - icu::Locale locale = icu::Locale::createCanonical (locale_str.c_str()); - std::unique_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kDefault, locale)); - if (formatter == nullptr) - throw std::invalid_argument ("Cannot parse string"); - + auto [formatter, calendar] = locale_to_formatter_and_calendar (locale_str); icu::UnicodeString input = icu::UnicodeString::fromUTF8(str); icu::ParsePosition parsePos; UDate date = formatter->parse(input, parsePos); if (parsePos.getErrorIndex() != -1) { - std::cout << "1\n"; - throw std::invalid_argument ("Cannot parse string"); - } - - std::unique_ptr calendar(icu::Calendar::createInstance(locale, status)); - if (U_FAILURE(status)) - { - std::cout << "2\n"; + std::cout << "cannot parse " << std::endl; throw std::invalid_argument ("Cannot parse string"); } + UErrorCode status = U_ZERO_ERROR; calendar->setTime(date, status); if (U_FAILURE(status)) - { - std::cout << "3\n"; throw std::invalid_argument ("Cannot parse string"); - } int32_t day = calendar->get(UCAL_DATE, status); int32_t month = calendar->get(UCAL_MONTH, status) + 1; int32_t year = calendar->get(UCAL_YEAR, status); if (U_FAILURE(status)) - { - std::cout << "4\n"; throw std::invalid_argument ("Cannot parse string"); - } std::cout << day << '/' << month << '/' << year << std::endl; m_greg = Date(year, month, day); From c16473329ddb51619114b00cf367de84e742e6ed Mon Sep 17 00:00:00 2001 From: Christopher Lam Date: Sat, 7 Sep 2024 20:25:20 +0800 Subject: [PATCH 5/6] [attempt] test 500 dates with all locales, filtering into successful parses test 500 dates in 797 locales filtered down to 796 locales in 3.37s --- .../csv-imp/gnc-tokenizer-csv.hpp | 2 ++ .../import-export/csv-imp/gnc-tokenizer.cpp | 22 +++++++++++++++ .../csv-imp/test/test-tokenizer.cpp | 28 +++++++++++++++++++ libgnucash/engine/gnc-datetime.cpp | 9 +++--- 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp index c60910b5a42..3f04037f543 100644 --- a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp +++ b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.hpp @@ -59,4 +59,6 @@ class GncCsvTokenizer : public GncTokenizer std::string m_sep_str = ","; }; +void gnc_filter_locales (StrVec& candidate_locales, const StrVec dates); + #endif diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer.cpp b/gnucash/import-export/csv-imp/gnc-tokenizer.cpp index 8dc6b450eba..2d2b9827777 100644 --- a/gnucash/import-export/csv-imp/gnc-tokenizer.cpp +++ b/gnucash/import-export/csv-imp/gnc-tokenizer.cpp @@ -125,3 +125,25 @@ GncTokenizer::get_tokens() { return m_tokenized_contents; } + + + +using StrVec = std::vector; +#include "gnc-datetime.hpp" + +void +gnc_filter_locales (StrVec& candidate_locales, const StrVec dates) +{ + StrVec new_candidate_locales; + new_candidate_locales.reserve (candidate_locales.size()); + + for (const auto& date : dates) + { + new_candidate_locales.clear (); + for (const auto& locale : candidate_locales) + try { GncDate (date, locale); new_candidate_locales.push_back (locale); } + catch (const std::exception&) {}; + + std::swap (candidate_locales, new_candidate_locales); + } +} diff --git a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp index aad18520e0a..ff02e8d8300 100644 --- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp +++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp @@ -245,7 +245,35 @@ static tokenize_fw_test_data fixed_width [] = { { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } }, }; +#include // time_t +#include "gnc-locale-utils.hpp" + +static void test_filter_locales () +{ + std::vector dates; + + for (auto i = 0; i < 500; ++i) + dates.push_back ("09/22/2021"); + + auto locales = gnc_get_available_locales (); + std::cout << locales.size() << " locales available. Testing " + << dates.size() << " dates.\n"; + + auto start = clock(); + gnc_filter_locales (locales, dates); + auto end = clock(); + + double duration_sec = double(end-start)/CLOCKS_PER_SEC; + + std::cout << locales.size() << " locales left, checked in " + << duration_sec << " seconds:\n"; + for (auto locale : locales) + std::cout << ' ' << locale; + std::cout << '\n'; +} + TEST_F (GncTokenizerTest, tokenize_fw) { test_gnc_tokenize_helper (fixed_width); + test_filter_locales (); } diff --git a/libgnucash/engine/gnc-datetime.cpp b/libgnucash/engine/gnc-datetime.cpp index 24afbd0bfc5..0112de29072 100644 --- a/libgnucash/engine/gnc-datetime.cpp +++ b/libgnucash/engine/gnc-datetime.cpp @@ -552,10 +552,11 @@ locale_to_formatter_and_calendar (const std::string locale_str) if (!tuple) { auto locale = icu::Locale::createCanonical (locale_str.c_str()); - std::shared_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kDefault, locale)); + std::shared_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kShort, locale)); if (formatter == nullptr) throw std::invalid_argument ("Cannot parse string"); + formatter->setLenient (false); UErrorCode status = U_ZERO_ERROR; std::shared_ptr calendar(icu::Calendar::createInstance(locale, status)); if (U_FAILURE(status)) @@ -573,7 +574,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : /* Temporarily initialized to today, will be used and adjusted in the code below */ m_greg(boost::gregorian::day_clock::local_day()) { - std::cout << locale_str << '|' << str << ": "; + // std::cout << locale_str << '|' << str << ": "; auto [formatter, calendar] = locale_to_formatter_and_calendar (locale_str); icu::UnicodeString input = icu::UnicodeString::fromUTF8(str); @@ -582,7 +583,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : UDate date = formatter->parse(input, parsePos); if (parsePos.getErrorIndex() != -1) { - std::cout << "cannot parse " << std::endl; + // std::cout << "cannot parse " << std::endl; throw std::invalid_argument ("Cannot parse string"); } @@ -598,7 +599,7 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : if (U_FAILURE(status)) throw std::invalid_argument ("Cannot parse string"); - std::cout << day << '/' << month << '/' << year << std::endl; + // std::cout << day << '/' << month << '/' << year << std::endl; m_greg = Date(year, month, day); } From 602e519ff903c2dad68f04f890700907460dbb79 Mon Sep 17 00:00:00 2001 From: Christopher Lam Date: Sun, 8 Sep 2024 10:44:45 +0800 Subject: [PATCH 6/6] [attempt2] parse both kShort and kMedium... [hack] --- libgnucash/engine/gnc-datetime.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/libgnucash/engine/gnc-datetime.cpp b/libgnucash/engine/gnc-datetime.cpp index 0112de29072..89ff2869648 100644 --- a/libgnucash/engine/gnc-datetime.cpp +++ b/libgnucash/engine/gnc-datetime.cpp @@ -544,25 +544,29 @@ GncDateTimeImpl::timestamp() using DateFormatPtr = std::shared_ptr; using CalendarPtr = std::shared_ptr; -static std::tuple +static std::tuple locale_to_formatter_and_calendar (const std::string locale_str) { - static std::map>> cache; + static std::unordered_map>> cache; auto& tuple = cache[locale_str]; if (!tuple) { auto locale = icu::Locale::createCanonical (locale_str.c_str()); - std::shared_ptr formatter(icu::DateFormat::createDateInstance(icu::DateFormat::kShort, locale)); - if (formatter == nullptr) + std::shared_ptr formatter_short (icu::DateFormat::createDateInstance(icu::DateFormat::kShort, locale)); + if (formatter_short == nullptr) throw std::invalid_argument ("Cannot parse string"); + formatter_short->setLenient (false); + + std::shared_ptr formatter_med (icu::DateFormat::createDateInstance(icu::DateFormat::kMedium, locale)); + formatter_med->setLenient (false); - formatter->setLenient (false); UErrorCode status = U_ZERO_ERROR; std::shared_ptr calendar(icu::Calendar::createInstance(locale, status)); if (U_FAILURE(status)) throw std::invalid_argument ("Cannot parse string"); - tuple = std::make_tuple(std::move(formatter), std::move(calendar)); + tuple = std::make_tuple + (std::move(formatter_short), std::move(formatter_med), std::move(calendar)); } return *tuple; @@ -576,16 +580,17 @@ GncDateImpl::GncDateImpl(const std::string str, const std::string locale_str) : { // std::cout << locale_str << '|' << str << ": "; - auto [formatter, calendar] = locale_to_formatter_and_calendar (locale_str); + auto [formatter_short, formatter_med, calendar] = locale_to_formatter_and_calendar (locale_str); icu::UnicodeString input = icu::UnicodeString::fromUTF8(str); icu::ParsePosition parsePos; - UDate date = formatter->parse(input, parsePos); + UDate date = formatter_short->parse(input, parsePos); // 1st attempt + + if (parsePos.getErrorIndex() != -1) + date = formatter_med->parse(input, parsePos); // 2nd attempt + if (parsePos.getErrorIndex() != -1) - { - // std::cout << "cannot parse " << std::endl; throw std::invalid_argument ("Cannot parse string"); - } UErrorCode status = U_ZERO_ERROR; calendar->setTime(date, status);